feat(ci): add incremental builds and shared content-utils (#6582)
- Incremental Markdown build for PRs, full build for production - Shared content-utils library for: - Mapping shared content to consuming pages (Markdown generation, Cypress) - Listing changed content pages (committed, uncommitted, staged) - Extracting source frontmatter (docs edit) - Fix CSS parsing warnings with JSDOM VirtualConsole - Remove unused imports and variablesfix-prefer-database-over-namespace^2
parent
b95e608d07
commit
4cb455b1ae
|
|
@ -44,7 +44,14 @@ jobs:
|
|||
command: yarn hugo --environment production --logLevel info --gc --destination workspace/public
|
||||
- run:
|
||||
name: Generate LLM-friendly Markdown
|
||||
command: yarn build:md --public-dir workspace/public
|
||||
command: |
|
||||
if [ "$CIRCLE_BRANCH" = "master" ]; then
|
||||
# Full build for production deployments
|
||||
yarn build:md --public-dir workspace/public
|
||||
else
|
||||
# Incremental build for PRs - only process changed files
|
||||
yarn build:md --public-dir workspace/public --only-changed --base-branch origin/master
|
||||
fi
|
||||
- persist_to_workspace:
|
||||
root: workspace
|
||||
paths:
|
||||
|
|
|
|||
|
|
@ -161,6 +161,28 @@ const CONCURRENCY = process.env.CI ? 10 : 20;
|
|||
- **Memory**: \~300MB peak (safe for 2GB CircleCI)
|
||||
- **Rate**: \~23 files/second with memory-bounded parallelism
|
||||
|
||||
## Making Deployment Changes
|
||||
|
||||
### During Initial Implementation
|
||||
|
||||
If making changes that affect `yarn build` commands or `.circleci/config.yml`:
|
||||
|
||||
1. **Read the surrounding context** in the CI file
|
||||
2. **Notice** flags, such as `--destination workspace/public` on the Hugo build
|
||||
3. **Ask**: "Does the build script need to know about environment details--for example, do paths differ between production and staging?"
|
||||
|
||||
### Recommended Prompt for Future Similar Work
|
||||
|
||||
> "This script will run in CI. Let me read the CI configuration to understand the build environment and directory structure before finalizing the implementation."
|
||||
|
||||
## Summary of Recommendations
|
||||
|
||||
| Strategy | Implementation | Effort |
|
||||
| ---------------------------------- | ---------------------------------- | ------ |
|
||||
| Read CI config before implementing | Process/habit change | Low |
|
||||
| Test on feature branch first | Push and watch CI before merging | Low |
|
||||
| Add CI validation step | Add file count check in config.yml | Low |
|
||||
|
||||
## Testing and Validation
|
||||
|
||||
### Local Testing
|
||||
|
|
|
|||
|
|
@ -2,60 +2,22 @@
|
|||
|
||||
import process from 'process';
|
||||
import fs from 'fs';
|
||||
import { execSync } from 'child_process';
|
||||
import matter from 'gray-matter';
|
||||
import { filePathToUrl } from '../../.github/scripts/utils/url-transformer.js';
|
||||
import {
|
||||
findPagesReferencingSharedContent,
|
||||
categorizeContentFiles,
|
||||
} from '../../scripts/lib/content-utils.js';
|
||||
|
||||
// Get file paths from command line arguments
|
||||
const filePaths = process.argv.slice(2).filter((arg) => !arg.startsWith('--'));
|
||||
|
||||
// Parse options
|
||||
const debugMode = process.argv.includes('--debug'); // deprecated, no longer used
|
||||
const jsonMode = process.argv.includes('--json');
|
||||
|
||||
// Separate shared content files and regular content files
|
||||
const sharedContentFiles = filePaths.filter(
|
||||
(file) =>
|
||||
file.startsWith('content/shared/') &&
|
||||
(file.endsWith('.md') || file.endsWith('.html'))
|
||||
);
|
||||
|
||||
const regularContentFiles = filePaths.filter(
|
||||
(file) =>
|
||||
file.startsWith('content/') &&
|
||||
!file.startsWith('content/shared/') &&
|
||||
(file.endsWith('.md') || file.endsWith('.html'))
|
||||
);
|
||||
|
||||
// Find pages that reference shared content files in their frontmatter
|
||||
function findPagesReferencingSharedContent(sharedFilePath) {
|
||||
try {
|
||||
// Remove the leading "content/" to match how it would appear in frontmatter
|
||||
const relativePath = sharedFilePath.replace(/^content\//, '');
|
||||
|
||||
// Use grep to find files that reference this shared content in frontmatter
|
||||
// Look for source: <path> pattern in YAML frontmatter
|
||||
const grepCmd = `grep -l "source: .*${relativePath}" --include="*.md" --include="*.html" -r content/`;
|
||||
|
||||
// Execute grep command and parse results
|
||||
const result = execSync(grepCmd, { encoding: 'utf8' }).trim();
|
||||
|
||||
if (!result) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return result.split('\n').filter(Boolean);
|
||||
} catch (error) {
|
||||
// grep returns non-zero exit code when no matches are found
|
||||
if (error.status === 1) {
|
||||
return [];
|
||||
}
|
||||
console.error(
|
||||
`Error finding references to ${sharedFilePath}: ${error.message}`
|
||||
);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
const { shared: sharedContentFiles, regular: regularContentFiles } =
|
||||
categorizeContentFiles(filePaths);
|
||||
|
||||
/**
|
||||
* Extract source from frontmatter or use the file path as source
|
||||
|
|
|
|||
|
|
@ -13,16 +13,10 @@ import { glob } from 'glob';
|
|||
import fs from 'fs/promises';
|
||||
import { readFileSync } from 'fs';
|
||||
import path from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
import { dirname } from 'path';
|
||||
import { createRequire } from 'module';
|
||||
import yaml from 'js-yaml';
|
||||
import pLimit from 'p-limit';
|
||||
|
||||
// Get __dirname equivalent in ESM
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
|
||||
// Create require function for CommonJS modules
|
||||
const require = createRequire(import.meta.url);
|
||||
const { convertToMarkdown } = require('./lib/markdown-converter.cjs');
|
||||
|
|
@ -44,13 +38,14 @@ const { convertToMarkdown } = require('./lib/markdown-converter.cjs');
|
|||
*/
|
||||
const MIN_HTML_SIZE_BYTES = 1024;
|
||||
|
||||
/**
|
||||
* Approximate character-to-token ratio for estimation.
|
||||
* Used to estimate token count from markdown content length.
|
||||
*
|
||||
* @default 4 - Rough heuristic (4 characters ≈ 1 token)
|
||||
*/
|
||||
const CHARS_PER_TOKEN = 4;
|
||||
// ============================================================================
|
||||
// INCREMENTAL BUILD HELPERS
|
||||
// ============================================================================
|
||||
|
||||
import {
|
||||
getChangedContentFiles,
|
||||
mapContentToPublic,
|
||||
} from './lib/content-utils.js';
|
||||
|
||||
// ============================================================================
|
||||
// PHASE 1: HTML → MARKDOWN CONVERSION
|
||||
|
|
@ -60,17 +55,48 @@ const CHARS_PER_TOKEN = 4;
|
|||
* Phase 1: Convert all HTML files to individual page markdown
|
||||
* Uses memory-bounded parallelism to avoid OOM in CI
|
||||
* @param {string} publicDir - Directory containing Hugo build output
|
||||
* @param {Object} options - Build options
|
||||
* @param {boolean} options.onlyChanged - Only process files changed since base branch
|
||||
* @param {string} options.baseBranch - Base branch for comparison (default: 'origin/master')
|
||||
*/
|
||||
async function buildPageMarkdown(publicDir = 'public') {
|
||||
async function buildPageMarkdown(publicDir = 'public', options = {}) {
|
||||
const { onlyChanged = false, baseBranch = 'origin/master' } = options;
|
||||
|
||||
console.log('📄 Converting HTML to Markdown (individual pages)...\n');
|
||||
const startTime = Date.now();
|
||||
|
||||
// Find all HTML files
|
||||
const htmlFiles = await glob(`${publicDir}/**/index.html`, {
|
||||
let htmlFiles = await glob(`${publicDir}/**/index.html`, {
|
||||
ignore: ['**/node_modules/**', '**/api-docs/**'],
|
||||
});
|
||||
|
||||
console.log(`Found ${htmlFiles.length} HTML files\n`);
|
||||
const totalFiles = htmlFiles.length;
|
||||
console.log(`Found ${totalFiles} HTML files\n`);
|
||||
|
||||
// Filter to only changed files if requested
|
||||
if (onlyChanged) {
|
||||
const changedContentFiles = getChangedContentFiles(baseBranch);
|
||||
|
||||
if (changedContentFiles.length > 0) {
|
||||
const changedHtmlSet = mapContentToPublic(changedContentFiles, publicDir);
|
||||
const filteredFiles = htmlFiles.filter((f) => changedHtmlSet.has(f));
|
||||
|
||||
console.log(
|
||||
`🔄 Incremental build: ${filteredFiles.length}/${totalFiles} files changed since ${baseBranch}\n`
|
||||
);
|
||||
|
||||
if (filteredFiles.length === 0) {
|
||||
console.log(' No matching HTML files found, skipping Phase 1\n');
|
||||
return { converted: 0, skipped: 0, errors: [] };
|
||||
}
|
||||
|
||||
htmlFiles = filteredFiles;
|
||||
} else {
|
||||
console.log(
|
||||
' ⚠️ No changed content files detected, processing all files\n'
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Memory-bounded concurrency
|
||||
// CircleCI medium (2GB RAM): 10 workers safe
|
||||
|
|
@ -381,6 +407,8 @@ function parseArgs() {
|
|||
const options = {
|
||||
environment: null,
|
||||
publicDir: 'public',
|
||||
onlyChanged: false,
|
||||
baseBranch: 'origin/master',
|
||||
};
|
||||
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
|
|
@ -388,6 +416,10 @@ function parseArgs() {
|
|||
options.environment = args[++i];
|
||||
} else if (args[i] === '--public-dir' && args[i + 1]) {
|
||||
options.publicDir = args[++i];
|
||||
} else if (args[i] === '--only-changed') {
|
||||
options.onlyChanged = true;
|
||||
} else if (args[i] === '--base-branch' && args[i + 1]) {
|
||||
options.baseBranch = args[++i];
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -412,14 +444,25 @@ async function main() {
|
|||
}
|
||||
|
||||
// Show public directory
|
||||
console.log(`📁 Public directory: ${cliOptions.publicDir}\n`);
|
||||
console.log(`📁 Public directory: ${cliOptions.publicDir}`);
|
||||
|
||||
// Show build mode
|
||||
if (cliOptions.onlyChanged) {
|
||||
console.log(`🔄 Mode: Incremental (comparing to ${cliOptions.baseBranch})`);
|
||||
} else {
|
||||
console.log('📦 Mode: Full build');
|
||||
}
|
||||
|
||||
console.log('');
|
||||
console.log('════════════════════════════════\n');
|
||||
|
||||
const overallStart = Date.now();
|
||||
|
||||
// Phase 1: Generate individual page markdown
|
||||
const pageResults = await buildPageMarkdown(cliOptions.publicDir);
|
||||
const pageResults = await buildPageMarkdown(cliOptions.publicDir, {
|
||||
onlyChanged: cliOptions.onlyChanged,
|
||||
baseBranch: cliOptions.baseBranch,
|
||||
});
|
||||
|
||||
// Phase 2: Build section bundles
|
||||
const sectionResults = await buildSectionBundles(cliOptions.publicDir);
|
||||
|
|
@ -462,3 +505,10 @@ export {
|
|||
combineMarkdown,
|
||||
parseMarkdown,
|
||||
};
|
||||
|
||||
// Re-export content utilities
|
||||
export {
|
||||
getChangedContentFiles,
|
||||
mapContentToPublic,
|
||||
findPagesReferencingSharedContent,
|
||||
} from './lib/content-utils.js';
|
||||
|
|
|
|||
|
|
@ -14,9 +14,10 @@ import { parseArgs } from 'node:util';
|
|||
import process from 'node:process';
|
||||
import { join, dirname } from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
import { existsSync, readFileSync } from 'fs';
|
||||
import { existsSync } from 'fs';
|
||||
import { spawn } from 'child_process';
|
||||
import { parseDocumentationURL, urlToFilePaths } from './lib/url-parser.js';
|
||||
import { getSourceFromFrontmatter } from './lib/content-utils.js';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
|
|
@ -128,24 +129,12 @@ function findFiles(url) {
|
|||
|
||||
/**
|
||||
* Check if file uses shared content
|
||||
* @param {string} filePath - Relative path from repo root
|
||||
* @returns {string|null} Path to shared source file or null
|
||||
*/
|
||||
function checkSharedContent(filePath) {
|
||||
const fullPath = join(REPO_ROOT, filePath);
|
||||
|
||||
if (!existsSync(fullPath)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const content = readFileSync(fullPath, 'utf8');
|
||||
|
||||
// Check for source: frontmatter
|
||||
const sourceMatch = content.match(/^source:\s*(.+)$/m);
|
||||
if (sourceMatch) {
|
||||
const sourcePath = sourceMatch[1].trim();
|
||||
return `content${sourcePath}`;
|
||||
}
|
||||
|
||||
return null;
|
||||
return getSourceFromFrontmatter(fullPath);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -0,0 +1,251 @@
|
|||
/**
|
||||
* Content Utilities Library
|
||||
*
|
||||
* Shared utilities for working with content files, including:
|
||||
* - Detecting changed files via git
|
||||
* - Resolving shared content dependencies
|
||||
* - Mapping content paths to public/URL paths
|
||||
* - Extracting source frontmatter
|
||||
*
|
||||
* Used by:
|
||||
* - scripts/build-llm-markdown.js (incremental builds)
|
||||
* - scripts/docs-edit.js (opening shared source files)
|
||||
* - cypress/support/map-files-to-urls.js (test file mapping)
|
||||
*/
|
||||
|
||||
import { execSync } from 'child_process';
|
||||
import { existsSync, readFileSync } from 'fs';
|
||||
|
||||
/**
|
||||
* Find pages that reference a shared content file via source: frontmatter
|
||||
* @param {string} sharedFilePath - Path to shared file (e.g., 'content/shared/sql-reference/_index.md')
|
||||
* @returns {string[]} Array of content file paths that reference this shared file
|
||||
*/
|
||||
export function findPagesReferencingSharedContent(sharedFilePath) {
|
||||
try {
|
||||
// Remove leading "content/" to match frontmatter format (source: /shared/...)
|
||||
const relativePath = sharedFilePath.replace(/^content\//, '');
|
||||
|
||||
// Use grep to find files with source: <path> in frontmatter
|
||||
// Include both .md and .html files for compatibility
|
||||
const grepCmd = `grep -l "source: .*${relativePath}" --include="*.md" --include="*.html" -r content/`;
|
||||
|
||||
const result = execSync(grepCmd, {
|
||||
encoding: 'utf-8',
|
||||
stdio: ['pipe', 'pipe', 'pipe'],
|
||||
}).trim();
|
||||
|
||||
return result ? result.split('\n').filter(Boolean) : [];
|
||||
} catch (err) {
|
||||
// grep returns exit code 1 when no matches found
|
||||
if (err.status === 1) {
|
||||
return [];
|
||||
}
|
||||
console.warn(
|
||||
` ⚠️ Error finding references to ${sharedFilePath}: ${err.message}`
|
||||
);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Expand a list of changed files to include pages that reference changed shared content
|
||||
* @param {string[]} changedFiles - Array of changed file paths
|
||||
* @param {Object} options - Options
|
||||
* @param {boolean} options.verbose - Log details about shared content resolution
|
||||
* @returns {string[]} Expanded array including pages referencing changed shared content
|
||||
*/
|
||||
export function expandSharedContentChanges(changedFiles, options = {}) {
|
||||
const { verbose = false } = options;
|
||||
|
||||
// Separate shared and regular content files
|
||||
const sharedFiles = changedFiles.filter((f) =>
|
||||
f.startsWith('content/shared/')
|
||||
);
|
||||
const regularFiles = changedFiles.filter(
|
||||
(f) => !f.startsWith('content/shared/')
|
||||
);
|
||||
|
||||
// Start with regular files
|
||||
const allAffectedFiles = new Set(regularFiles);
|
||||
|
||||
// For each changed shared file, find all pages that reference it
|
||||
if (sharedFiles.length > 0) {
|
||||
if (verbose) {
|
||||
console.log(
|
||||
` 📎 Found ${sharedFiles.length} shared content changes, finding referencing pages...`
|
||||
);
|
||||
}
|
||||
|
||||
for (const sharedFile of sharedFiles) {
|
||||
const referencingPages = findPagesReferencingSharedContent(sharedFile);
|
||||
if (referencingPages.length > 0) {
|
||||
if (verbose) {
|
||||
console.log(` ${sharedFile} → ${referencingPages.length} pages`);
|
||||
}
|
||||
referencingPages.forEach((page) => allAffectedFiles.add(page));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Array.from(allAffectedFiles);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get list of content files that changed compared to base branch
|
||||
* Includes both committed changes and uncommitted working tree changes
|
||||
* Expands shared content changes to include all pages that reference them
|
||||
* @param {string} baseBranch - Branch to compare against (e.g., 'origin/master')
|
||||
* @param {Object} options - Options
|
||||
* @param {boolean} options.verbose - Log details about change detection
|
||||
* @returns {string[]} Array of changed content file paths
|
||||
*/
|
||||
export function getChangedContentFiles(baseBranch, options = {}) {
|
||||
const { verbose = true } = options;
|
||||
|
||||
try {
|
||||
const allChangedFiles = new Set();
|
||||
|
||||
// Get committed changes between base branch and HEAD
|
||||
try {
|
||||
const committedOutput = execSync(
|
||||
`git diff --name-only ${baseBranch}...HEAD -- content/`,
|
||||
{
|
||||
encoding: 'utf-8',
|
||||
stdio: ['pipe', 'pipe', 'pipe'],
|
||||
}
|
||||
);
|
||||
committedOutput
|
||||
.trim()
|
||||
.split('\n')
|
||||
.filter(Boolean)
|
||||
.forEach((f) => allChangedFiles.add(f));
|
||||
} catch {
|
||||
// May fail if baseBranch doesn't exist locally
|
||||
}
|
||||
|
||||
// Get uncommitted changes (staged + unstaged) in working tree
|
||||
try {
|
||||
const uncommittedOutput = execSync(
|
||||
`git diff --name-only HEAD -- content/`,
|
||||
{
|
||||
encoding: 'utf-8',
|
||||
stdio: ['pipe', 'pipe', 'pipe'],
|
||||
}
|
||||
);
|
||||
uncommittedOutput
|
||||
.trim()
|
||||
.split('\n')
|
||||
.filter(Boolean)
|
||||
.forEach((f) => allChangedFiles.add(f));
|
||||
} catch {
|
||||
// May fail in detached HEAD state
|
||||
}
|
||||
|
||||
// Get staged changes
|
||||
try {
|
||||
const stagedOutput = execSync(
|
||||
`git diff --name-only --cached -- content/`,
|
||||
{
|
||||
encoding: 'utf-8',
|
||||
stdio: ['pipe', 'pipe', 'pipe'],
|
||||
}
|
||||
);
|
||||
stagedOutput
|
||||
.trim()
|
||||
.split('\n')
|
||||
.filter(Boolean)
|
||||
.forEach((f) => allChangedFiles.add(f));
|
||||
} catch {
|
||||
// Ignore errors
|
||||
}
|
||||
|
||||
const changedFiles = Array.from(allChangedFiles);
|
||||
|
||||
// Expand to include pages referencing changed shared content
|
||||
return expandSharedContentChanges(changedFiles, { verbose });
|
||||
} catch (err) {
|
||||
console.warn(` ⚠️ Could not detect changed files: ${err.message}`);
|
||||
return []; // Fall back to full build
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Map content file paths to their corresponding public HTML paths
|
||||
* @param {string[]} contentFiles - Array of content file paths (e.g., 'content/influxdb3/core/page.md')
|
||||
* @param {string} publicDir - Public directory (e.g., 'public' or 'workspace/public')
|
||||
* @returns {Set<string>} Set of public HTML file paths
|
||||
*/
|
||||
export function mapContentToPublic(contentFiles, publicDir) {
|
||||
const htmlPaths = new Set();
|
||||
|
||||
for (const file of contentFiles) {
|
||||
// Only process markdown files
|
||||
if (!file.endsWith('.md')) continue;
|
||||
|
||||
// Remove content/ prefix and .md extension
|
||||
let urlPath = file.replace(/^content\//, '').replace(/\.md$/, '');
|
||||
|
||||
// Handle _index.md (section pages) - remove the _index suffix
|
||||
urlPath = urlPath.replace(/\/_index$/, '');
|
||||
|
||||
// Build public HTML path
|
||||
const htmlPath = `${publicDir}/${urlPath}/index.html`.replace(/\/+/g, '/');
|
||||
htmlPaths.add(htmlPath);
|
||||
}
|
||||
|
||||
return htmlPaths;
|
||||
}
|
||||
|
||||
/**
|
||||
* Separate content files into shared and regular categories
|
||||
* @param {string[]} files - Array of file paths
|
||||
* @returns {{shared: string[], regular: string[]}} Categorized files
|
||||
*/
|
||||
export function categorizeContentFiles(files) {
|
||||
const shared = files.filter(
|
||||
(file) =>
|
||||
file.startsWith('content/shared/') &&
|
||||
(file.endsWith('.md') || file.endsWith('.html'))
|
||||
);
|
||||
|
||||
const regular = files.filter(
|
||||
(file) =>
|
||||
file.startsWith('content/') &&
|
||||
!file.startsWith('content/shared/') &&
|
||||
(file.endsWith('.md') || file.endsWith('.html'))
|
||||
);
|
||||
|
||||
return { shared, regular };
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the source path from a file's frontmatter
|
||||
* Used to find the shared content file that a page includes
|
||||
* @param {string} filePath - Path to the content file
|
||||
* @returns {string|null} The source path (e.g., 'content/shared/sql-reference/_index.md') or null
|
||||
*/
|
||||
export function getSourceFromFrontmatter(filePath) {
|
||||
if (!existsSync(filePath)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const content = readFileSync(filePath, 'utf8');
|
||||
|
||||
// Quick regex check for source: in frontmatter (avoids full YAML parsing)
|
||||
const sourceMatch = content.match(/^source:\s*(.+)$/m);
|
||||
if (sourceMatch) {
|
||||
const sourcePath = sourceMatch[1].trim();
|
||||
// Normalize to content/ prefix format
|
||||
if (sourcePath.startsWith('/')) {
|
||||
return `content${sourcePath}`;
|
||||
}
|
||||
return sourcePath;
|
||||
}
|
||||
|
||||
return null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -10,9 +10,7 @@
|
|||
*/
|
||||
|
||||
const TurndownService = require('turndown');
|
||||
const { JSDOM } = require('jsdom');
|
||||
const path = require('path');
|
||||
const fs = require('fs');
|
||||
const { JSDOM, VirtualConsole } = require('jsdom');
|
||||
const yaml = require('js-yaml');
|
||||
|
||||
// Try to load Rust converter (10x faster), fall back to JavaScript
|
||||
|
|
@ -167,7 +165,7 @@ function createTurndownService() {
|
|||
node.firstChild.nodeName === 'CODE'
|
||||
);
|
||||
},
|
||||
replacement: function (content, node, options) {
|
||||
replacement: function (_content, node, options) {
|
||||
const code = node.firstChild;
|
||||
const language = code.className.replace(/^language-/, '') || '';
|
||||
const fence = options.fence;
|
||||
|
|
@ -204,7 +202,7 @@ function createTurndownService() {
|
|||
// Convert HTML tables to Markdown tables
|
||||
turndownService.addRule('tables', {
|
||||
filter: 'table',
|
||||
replacement: function (content, node) {
|
||||
replacement: function (_content, node) {
|
||||
// Get all rows from tbody and thead
|
||||
const theadRows = Array.from(node.querySelectorAll('thead tr'));
|
||||
const tbodyRows = Array.from(node.querySelectorAll('tbody tr'));
|
||||
|
|
@ -303,7 +301,17 @@ function createTurndownService() {
|
|||
* @returns {Object|null} Object with title, description, content or null if not found
|
||||
*/
|
||||
function extractArticleContent(htmlContent, contextInfo = '') {
|
||||
const dom = new JSDOM(htmlContent);
|
||||
// Create a virtual console to suppress CSS parsing errors
|
||||
// JSDOM attempts to parse stylesheets which is unnecessary for markdown conversion
|
||||
const virtualConsole = new VirtualConsole();
|
||||
// Optionally forward errors to console for debugging (commented out to suppress)
|
||||
// virtualConsole.sendTo(console, { omitJSDOMErrors: true });
|
||||
|
||||
const dom = new JSDOM(htmlContent, {
|
||||
virtualConsole,
|
||||
// Don't load external resources (stylesheets, scripts, images)
|
||||
resources: 'usable',
|
||||
});
|
||||
const document = dom.window.document;
|
||||
|
||||
try {
|
||||
|
|
|
|||
Loading…
Reference in New Issue