fix(ci): add --public-dir argument to build-llm-markdown.js (#6581)

CircleCI builds Hugo to workspace/public, but the markdown generator
was hardcoded to look in public/. Added --public-dir argument:

- Default: public (for local dev and staging)
- CI: --public-dir workspace/public

Staging deployment (deploy-staging.sh) uses default public/ and
continues to work unchanged.
pull/6577/head^2
Jason Stirnaman 2025-12-01 14:19:01 -05:00 committed by GitHub
parent c2093c8212
commit 2dd1956a18
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 24 additions and 13 deletions

View File

@ -44,7 +44,7 @@ jobs:
command: yarn hugo --environment production --logLevel info --gc --destination workspace/public
- run:
name: Generate LLM-friendly Markdown
command: yarn build:md
command: yarn build:md --public-dir workspace/public
- persist_to_workspace:
root: workspace
paths:

View File

@ -59,13 +59,14 @@ const CHARS_PER_TOKEN = 4;
/**
* Phase 1: Convert all HTML files to individual page markdown
* Uses memory-bounded parallelism to avoid OOM in CI
* @param {string} publicDir - Directory containing Hugo build output
*/
async function buildPageMarkdown() {
async function buildPageMarkdown(publicDir = 'public') {
console.log('📄 Converting HTML to Markdown (individual pages)...\n');
const startTime = Date.now();
// Find all HTML files
const htmlFiles = await glob('public/**/index.html', {
const htmlFiles = await glob(`${publicDir}/**/index.html`, {
ignore: ['**/node_modules/**', '**/api-docs/**'],
});
@ -98,8 +99,9 @@ async function buildPageMarkdown() {
const html = await fs.readFile(htmlPath, 'utf-8');
// Derive URL path for frontmatter
// Remove publicDir prefix (handles both 'public' and 'workspace/public')
const urlPath = htmlPath
.replace(/^public/, '')
.replace(new RegExp(`^${publicDir}`), '')
.replace(/\/index\.html$/, '/');
// Convert to markdown (JSDOM + Turndown processing)
@ -166,13 +168,14 @@ async function buildPageMarkdown() {
/**
* Phase 2: Build section bundles by combining individual markdown files
* Fast string concatenation with minimal memory usage
* @param {string} publicDir - Directory containing Hugo build output
*/
async function buildSectionBundles() {
async function buildSectionBundles(publicDir = 'public') {
console.log('📦 Building section bundles...\n');
const startTime = Date.now();
// Find all sections (directories with index.md + child index.md files)
const sections = await findSections();
const sections = await findSections(publicDir);
console.log(`Found ${sections.length} sections\n`);
@ -236,10 +239,12 @@ async function buildSectionBundles() {
/**
* Find all sections (parent pages with child pages)
* @param {string} publicDir - Directory containing Hugo build output
*/
async function findSections() {
const allMdFiles = await glob('public/**/index.md');
async function findSections(publicDir = 'public') {
const allMdFiles = await glob(`${publicDir}/**/index.md`);
const sections = [];
const publicDirRegex = new RegExp(`^${publicDir}`);
for (const mdPath of allMdFiles) {
const dir = path.dirname(mdPath);
@ -251,10 +256,10 @@ async function findSections() {
sections.push({
mdPath: mdPath,
url: dir.replace(/^public/, '') + '/',
url: dir.replace(publicDirRegex, '') + '/',
children: childMdFiles.map((childMdPath) => ({
mdPath: childMdPath,
url: path.dirname(childMdPath).replace(/^public/, '') + '/',
url: path.dirname(childMdPath).replace(publicDirRegex, '') + '/',
title: extractTitleFromMd(childMdPath),
})),
});
@ -375,11 +380,14 @@ function parseArgs() {
const args = process.argv.slice(2);
const options = {
environment: null,
publicDir: 'public',
};
for (let i = 0; i < args.length; i++) {
if ((args[i] === '-e' || args[i] === '--env') && args[i + 1]) {
options.environment = args[++i];
} else if (args[i] === '--public-dir' && args[i + 1]) {
options.publicDir = args[++i];
}
}
@ -400,18 +408,21 @@ async function main() {
// Show environment if specified
if (cliOptions.environment) {
console.log(`🌍 Environment: ${cliOptions.environment}\n`);
console.log(`🌍 Environment: ${cliOptions.environment}`);
}
// Show public directory
console.log(`📁 Public directory: ${cliOptions.publicDir}\n`);
console.log('════════════════════════════════\n');
const overallStart = Date.now();
// Phase 1: Generate individual page markdown
const pageResults = await buildPageMarkdown();
const pageResults = await buildPageMarkdown(cliOptions.publicDir);
// Phase 2: Build section bundles
const sectionResults = await buildSectionBundles();
const sectionResults = await buildSectionBundles(cliOptions.publicDir);
// Summary
const totalDuration = ((Date.now() - overallStart) / 1000).toFixed(1);