docs-v2/helper-scripts/build-agent-instructions.js

#!/usr/bin/env node

/**
 * Script to generate GitHub Copilot instructions
 * for InfluxData documentation.
 */
import fs from 'fs';
import path from 'path';
import process from 'process';
import { execSync } from 'child_process';

// Get the current file path and directory
export { buildContributingInstructions };

(async () => {
  try {
    buildContributingInstructions();
  } catch (error) {
    console.error('Error generating agent instructions:', error);
  }
})();

/** Build instructions from CONTRIBUTING.md
 * This script reads CONTRIBUTING.md, formats it appropriately,
 * and saves it to .github/instructions/contributing.instructions.md
 * Includes optimization to reduce file size for better performance
 */
function buildContributingInstructions() {
  // Paths
  const contributingPath = path.join(process.cwd(), 'CONTRIBUTING.md');
  const instructionsDir = path.join(process.cwd(), '.github', 'instructions');
  const instructionsPath = path.join(
    instructionsDir,
    'contributing.instructions.md'
  );

  // Ensure the instructions directory exists
  if (!fs.existsSync(instructionsDir)) {
    fs.mkdirSync(instructionsDir, { recursive: true });
  }

  // Read the CONTRIBUTING.md file
  let content = fs.readFileSync(contributingPath, 'utf8');

  // Optimize content by removing less critical sections for Copilot
  content = optimizeContentForContext(content);

  // Format the content for Copilot instructions with applyTo attribute
  content = `---
applyTo: "content/**/*.md, layouts/**/*.html"
---

# Contributing instructions for InfluxData Documentation

## Purpose and scope

Help document InfluxData products
by creating clear, accurate technical content with proper
code examples, frontmatter, shortcodes, and formatting.

${content}`;

  // Write the formatted content to the instructions file
  fs.writeFileSync(instructionsPath, content);

  const fileSize = fs.statSync(instructionsPath).size;
  const sizeInKB = (fileSize / 1024).toFixed(1);
  console.log(
    `✅ Generated instructions at ${instructionsPath} (${sizeInKB}KB)`
  );

  if (fileSize > 40000) {
    console.warn(
      `⚠️  Instructions file is large (${sizeInKB}KB > 40KB) and may ` +
        `impact performance`
    );
  }

  // Add the file to git if it has changed
  try {
    const gitStatus = execSync(
      `git status --porcelain "${instructionsPath}"`
    ).toString();
    if (gitStatus.trim()) {
      execSync(`git add "${instructionsPath}"`);
      console.log('✅ Added instructions file to git staging');
    }

    // Also add any extracted files to git
    const extractedFiles = execSync(
      `git status --porcelain "${instructionsDir}/*.md"`
    ).toString();
    if (extractedFiles.trim()) {
      execSync(`git add "${instructionsDir}"/*.md`);
      console.log('✅ Added extracted files to git staging');
    }
  } catch (error) {
    console.warn('⚠️  Could not add files to git:', error.message);
  }
}

/**
 * Optimize content for AI agents by processing sections based on tags
 * while preserving essential documentation guidance and structure
 */
function optimizeContentForContext(content) {
  // Split content into sections based on agent:instruct tags
  const sections = [];
  const tagRegex =
    /<!-- agent:instruct: (essential|condense|remove|extract\s+\S+) -->/g;

  let lastIndex = 0;
  let matches = [...content.matchAll(tagRegex)];

  // Process each tagged section
  for (let i = 0; i < matches.length; i++) {
    const match = matches[i];

    // Add untagged content before this tag
    if (match.index > lastIndex) {
      sections.push({
        type: 'untagged',
        content: content.slice(lastIndex, match.index),
      });
    }

    // Find the end of this section (next tag or end of content)
    const nextMatch = matches[i + 1];
    const endIndex = nextMatch ? nextMatch.index : content.length;

    sections.push({
      type: match[1],
      content: content.slice(match.index, endIndex),
    });

    lastIndex = endIndex;
  }

  // Add any remaining untagged content
  if (lastIndex < content.length) {
    sections.push({
      type: 'untagged',
      content: content.slice(lastIndex),
    });
  }

  // Process sections based on their tags
  let processedContent = '';

  sections.forEach((section) => {
    switch (section.type) {
      case 'essential':
        processedContent += cleanupSection(section.content);
        break;
      case 'condense':
        processedContent += condenseSection(section.content);
        break;
      case 'remove':
        // Skip these sections entirely
        break;
      default:
        if (section.type.startsWith('extract ')) {
          const filename = section.type.substring(8); // Remove 'extract ' prefix
          processedContent += processExtractSection(section.content, filename);
        } else {
          processedContent += processUntaggedSection(section.content);
        }
        break;
      case 'untagged':
        processedContent += processUntaggedSection(section.content);
        break;
    }
  });

  // Final cleanup
  return cleanupFormatting(processedContent);
}

/**
 * Clean up essential sections while preserving all content
 */
function cleanupSection(content) {
  // Remove the tag comment itself
  content = content.replace(/<!-- agent:instruct: essential -->\n?/g, '');

  // Only basic cleanup for essential sections
  content = content.replace(/\n{4,}/g, '\n\n\n');

  return content;
}

/**
 * Condense sections to key information
 */
function condenseSection(content) {
  // Remove the tag comment
  content = content.replace(/<!-- agent:instruct: condense -->\n?/g, '');

  // Extract section header
  const headerMatch = content.match(/^(#+\s+.+)/m);
  if (!headerMatch) return content;

  // Condense very long code examples
  content = content.replace(/```[\s\S]{300,}?```/g, (match) => {
    const firstLines = match.split('\n').slice(0, 3).join('\n');
    return `${firstLines}\n# ... (see full CONTRIBUTING.md for complete example)\n\`\`\``;
  });

  // Keep first paragraph and key bullet points
  const lines = content.split('\n');
  const processedLines = [];
  let inCodeBlock = false;
  let paragraphCount = 0;

  for (const line of lines) {
    if (line.startsWith('```')) {
      inCodeBlock = !inCodeBlock;
      processedLines.push(line);
    } else if (inCodeBlock) {
      processedLines.push(line);
    } else if (line.startsWith('#')) {
      processedLines.push(line);
    } else if (line.trim() === '') {
      processedLines.push(line);
    } else if (
      line.startsWith('- ') ||
      line.startsWith('* ') ||
      line.match(/^\d+\./)
    ) {
      processedLines.push(line);
    } else if (paragraphCount < 2 && line.trim() !== '') {
      processedLines.push(line);
      if (line.trim() !== '' && !line.startsWith('  ')) {
        paragraphCount++;
      }
    }
  }

  return (
    processedLines.join('\n') +
    '\n\n_See full CONTRIBUTING.md for complete details._\n\n'
  );
}

/**
 * Process extract sections to create separate files and placeholders
 */
function processExtractSection(content, filename) {
  // Remove the tag comment
  content = content.replace(/<!-- agent:instruct: extract \S+ -->\n?/g, '');

  // Extract section header
  const headerMatch = content.match(/^(#+\s+.+)/m);
  if (!headerMatch) return content;

  const header = headerMatch[1];
  const sectionTitle = header.replace(/^#+\s+/, '');

  // Write the section content to a separate file
  const instructionsDir = path.join(process.cwd(), '.github', 'instructions');
  const extractedFilePath = path.join(instructionsDir, filename);

  // Add frontmatter to the extracted file
  const extractedContent = `---
applyTo: "content/**/*.md, layouts/**/*.html"
---

${content}`;

  fs.writeFileSync(extractedFilePath, extractedContent);

  console.log(`✅ Extracted ${sectionTitle} to ${extractedFilePath}`);

  // Create a placeholder that references the extracted file
  return `${header}\n\n_For the complete ${sectionTitle} reference, see ${filename}._\n\n`;
}

/**
 * Process untagged sections with moderate optimization
 */
function processUntaggedSection(content) {
  // Apply moderate processing to untagged sections

  // Condense very long code examples but keep structure
  content = content.replace(/```[\s\S]{400,}?```/g, (match) => {
    const firstLines = match.split('\n').slice(0, 5).join('\n');
    return `${firstLines}\n# ... (content truncated)\n\`\`\``;
  });

  return content;
}

/**
 * Clean up formatting issues in the processed content
 */
function cleanupFormatting(content) {
  // Fix multiple consecutive newlines
  content = content.replace(/\n{4,}/g, '\n\n\n');

  // Remove agent-instructions comments that might remain
  content = content.replace(/<!-- agent:instruct: \w+ -->\n?/g, '');

  // Fix broken code blocks
  content = content.replace(
    /```(\w+)?\n\n+```/g,
    '```$1\n# (empty code block)\n```'
  );

  // Fix broken markdown headers
  content = content.replace(/^(#+)\s*$/gm, '');

  // Fix broken list formatting
  content = content.replace(/^(-|\*|\d+\.)\s*$/gm, '');

  // Remove empty sections
  content = content.replace(/^#+\s+.+\n+(?=^#+\s+)/gm, (match) => {
    if (match.trim().split('\n').length <= 2) {
      return '';
    }
    return match;
  });

  return content;
}