/**
 * Markdown Converter Library
 *
 * Core conversion logic for transforming HTML to Markdown.
 * This library is used by both:
 * - docs-v2 build scripts (html-to-markdown.js)
 * - docs-tooling Lambda@Edge function
 *
 * Exports reusable functions for HTML→Markdown conversion
 */

const TurndownService = require('turndown');
const { JSDOM } = require('jsdom');
const path = require('path');
const fs = require('fs');
const yaml = require('js-yaml');

// Debug mode - set to true to enable verbose logging
const DEBUG = true;

// Product data cache
let productsData = null;

/**
 * Initialize product data from YAML file
 */
async function ensureProductDataInitialized() {
  if (productsData) {
    return;
  }

  try {
    // Path to products.yml from this file (scripts/lib/markdown-converter.js)
    const productsPath = path.join(__dirname, '../../data/products.yml');

    if (fs.existsSync(productsPath)) {
      const fileContents = fs.readFileSync(productsPath, 'utf8');
      productsData = yaml.load(fileContents);
    }
  } catch (err) {
    console.warn('Failed to load products.yml:', err.message);
    productsData = {}; // fallback to empty object
  }
}

/**
 * Get product info from URL path
 */
function getProductFromPath(urlPath) {
  if (!productsData) {
    return null;
  }

  // Match URL patterns to products
  // Based on patterns from product-mappings.ts
  for (const [key, product] of Object.entries(productsData)) {
    if (!product.url_path) continue;

    const pathPattern = product.url_path.replace(/\/$/, ''); // remove trailing slash
    if (urlPath.startsWith(pathPattern)) {
      return {
        key,
        name: product.name,
        version: product.version,
        description: product.description,
      };
    }
  }

  return null;
}

/**
 * Detect product context from URL path
 */
function detectProduct(urlPath) {
  return getProductFromPath(urlPath);
}

/**
 * Configure Turndown for InfluxData documentation
 */
function createTurndownService() {
  const turndownService = new TurndownService({
    headingStyle: 'atx',
    codeBlockStyle: 'fenced',
    fence: '```',
    emDelimiter: '*',
    strongDelimiter: '**',
    // Note: linkStyle: 'inline' breaks link conversion in Turndown 7.2.2
    // Using default 'referenced' style which works correctly
    bulletListMarker: '-',
  });

  // Preserve code block language identifiers
  turndownService.addRule('fencedCodeBlock', {
    filter: function (node, options) {
      return (
        options.codeBlockStyle === 'fenced' &&
        node.nodeName === 'PRE' &&
        node.firstChild &&
        node.firstChild.nodeName === 'CODE'
      );
    },
    replacement: function (content, node, options) {
      const code = node.firstChild;
      const language = code.className.replace(/^language-/, '') || '';
      const fence = options.fence;
      return `\n\n${fence}${language}\n${code.textContent}\n${fence}\n\n`;
    },
  });

  // Improve list item handling - ensure proper spacing
  turndownService.addRule('listItems', {
    filter: 'li',
    replacement: function (content, node, options) {
      content = content
        .replace(/^\n+/, '') // Remove leading newlines
        .replace(/\n+$/, '\n') // Single trailing newline
        .replace(/\n/gm, '\n    '); // Indent nested content

      let prefix = options.bulletListMarker + '   '; // Dash + 3 spaces for unordered lists
      const parent = node.parentNode;

      if (parent.nodeName === 'OL') {
        const start = parent.getAttribute('start');
        const index = Array.prototype.indexOf.call(parent.children, node);
        prefix = (start ? Number(start) + index : index + 1) + '. ';
      }

      return (
        prefix +
        content +
        (node.nextSibling && !/\n$/.test(content) ? '\n' : '')
      );
    },
  });

  // Convert HTML tables to Markdown tables
  turndownService.addRule('tables', {
    filter: 'table',
    replacement: function (content, node) {
      // Get all rows from tbody and thead
      const theadRows = Array.from(node.querySelectorAll('thead tr'));
      const tbodyRows = Array.from(node.querySelectorAll('tbody tr'));

      // If no thead/tbody, fall back to all tr elements
      const allRows =
        theadRows.length || tbodyRows.length
          ? [...theadRows, ...tbodyRows]
          : Array.from(node.querySelectorAll('tr'));

      if (allRows.length === 0) return '';

      // Extract headers from first row
      const headerRow = allRows[0];
      const headers = Array.from(headerRow.querySelectorAll('th, td')).map(
        (cell) => cell.textContent.trim()
      );

      // Build separator row
      const separator = headers.map(() => '---').join(' | ');

      // Extract data rows (skip first row which is the header)
      const dataRows = allRows
        .slice(1)
        .map((row) => {
          const cells = Array.from(row.querySelectorAll('td, th')).map((cell) =>
            cell.textContent.trim().replace(/\n/g, ' ')
          );
          return '| ' + cells.join(' | ') + ' |';
        })
        .join('\n');

      return (
        '\n| ' +
        headers.join(' | ') +
        ' |\n| ' +
        separator +
        ' |\n' +
        dataRows +
        '\n\n'
      );
    },
  });

  // Handle GitHub-style callouts (notes, warnings, etc.)
  turndownService.addRule('githubCallouts', {
    filter: function (node) {
      return (
        node.nodeName === 'BLOCKQUOTE' &&
        node.classList &&
        (node.classList.contains('note') ||
          node.classList.contains('warning') ||
          node.classList.contains('important') ||
          node.classList.contains('tip') ||
          node.classList.contains('caution'))
      );
    },
    replacement: function (content, node) {
      const type = Array.from(node.classList).find((c) =>
        ['note', 'warning', 'important', 'tip', 'caution'].includes(c)
      );
      const emoji =
        {
          note: 'Note',
          warning: 'Warning',
          caution: 'Caution',
          important: 'Important',
          tip: 'Tip',
        }[type] || 'Note';

      return `\n> [!${emoji}]\n> ${content.trim().replace(/\n/g, '\n> ')}\n\n`;
    },
  });

  // Remove navigation, footer, and other non-content elements
  turndownService.remove([
    'nav',
    'header',
    'footer',
    'script',
    'style',
    'noscript',
    'iframe',
    '.format-selector', // Remove format selector buttons (Copy page, etc.)
    '.page-feedback', // Remove page feedback form
    '#page-feedback', // Remove feedback modal
  ]);

  return turndownService;
}

/**
 * Extract article content from HTML
 * @param {string} htmlContent - Raw HTML content
 * @param {string} contextInfo - Context info for error messages (file path or URL)
 * @returns {Object|null} Object with title, description, content or null if not found
 */
function extractArticleContent(htmlContent, contextInfo = '') {
  const dom = new JSDOM(htmlContent);
  const document = dom.window.document;

  try {
    // Find the main article content
    const article = document.querySelector('article.article--content');

    // Debug logging
    if (DEBUG) {
      console.log(`[DEBUG] Looking for article in ${contextInfo}`);
      console.log(`[DEBUG] HTML length: ${htmlContent.length}`);
      console.log(`[DEBUG] Article found: ${!!article}`);
    }

    if (!article) {
      // Try alternative selectors to debug
      if (DEBUG) {
        const anyArticle = document.querySelector('article');
        const articleContent = document.querySelector('.article--content');
        console.log(`[DEBUG] Any article element: ${!!anyArticle}`);
        console.log(`[DEBUG] .article--content element: ${!!articleContent}`);
      }

      console.warn(
        `  ⚠️  No article content found in ${contextInfo}. This is typically not a problem and represents an aliased path.`
      );
      return null;
    }

    // Remove unwanted elements from article before conversion
    const elementsToRemove = [
      '.format-selector', // Remove format selector buttons
      '.page-feedback', // Remove page feedback form
      '#page-feedback', // Remove feedback modal
      '.feedback-widget', // Remove any feedback widgets
      '.helpful', // Remove "Was this page helpful?" section
      '.feedback.block', // Remove footer feedback/support section
      'hr', // Remove horizontal rules (often used as separators before footer)
    ];

    elementsToRemove.forEach((selector) => {
      const elements = article.querySelectorAll(selector);
      elements.forEach((el) => el.remove());
    });

    // Extract metadata
    const title =
      document.querySelector('h1')?.textContent?.trim() ||
      document.querySelector('title')?.textContent?.trim() ||
      'Untitled';

    const description =
      document
        .querySelector('meta[name="description"]')
        ?.getAttribute('content') ||
      document
        .querySelector('meta[property="og:description"]')
        ?.getAttribute('content') ||
      '';

    // Get the content before closing the DOM
    const content = article.innerHTML;

    return {
      title,
      description,
      content,
    };
  } finally {
    // Clean up JSDOM to prevent memory leaks
    dom.window.close();
  }
}

/**
 * Generate frontmatter for markdown file (single page)
 * @param {Object} metadata - Object with title, description
 * @param {string} urlPath - URL path for the page
 * @returns {string} YAML frontmatter as string
 */
function generateFrontmatter(metadata, urlPath) {
  const product = detectProduct(urlPath);
  const frontmatter = ['---'];

  frontmatter.push(`title: ${metadata.title}`);
  if (metadata.description) {
    frontmatter.push(`description: ${metadata.description}`);
  }
  frontmatter.push(`url: ${urlPath}`);

  if (product) {
    frontmatter.push(`product: ${product.name}`);
    if (product.version) {
      frontmatter.push(`version: ${product.version}`);
    }
  }

  frontmatter.push('---');
  return frontmatter.join('\n');
}

/**
 * Generate enhanced frontmatter for section aggregation
 * @param {Object} metadata - Object with title, description
 * @param {string} urlPath - URL path for the section
 * @param {Array} childPages - Array of child page objects with url and title
 * @returns {string} YAML frontmatter as string
 */
function generateSectionFrontmatter(metadata, urlPath, childPages) {
  const product = detectProduct(urlPath);
  const frontmatter = ['---'];

  frontmatter.push(`title: ${metadata.title}`);
  if (metadata.description) {
    frontmatter.push(`description: ${metadata.description}`);
  }
  frontmatter.push(`url: ${urlPath}`);
  frontmatter.push(`type: section`);
  frontmatter.push(`pages: ${childPages.length}`);

  // Add token estimate (rough: 4 chars per token)
  const contentLength = metadata.content?.length || 0;
  const childContentLength = childPages.reduce(
    (sum, child) => sum + (child.content?.length || 0),
    0
  );
  const totalLength = contentLength + childContentLength;
  const estimatedTokens = Math.ceil(totalLength / 4);
  frontmatter.push(`estimated_tokens: ${estimatedTokens}`);

  if (product) {
    frontmatter.push(`product: ${product.name}`);
    if (product.version) {
      frontmatter.push(`version: ${product.version}`);
    }
  }

  // List child pages
  if (childPages.length > 0) {
    frontmatter.push(`child_pages:`);
    childPages.forEach((child) => {
      frontmatter.push(`  - url: ${child.url}`);
      frontmatter.push(`    title: ${child.title}`);
    });
  }

  frontmatter.push('---');
  return frontmatter.join('\n');
}

/**
 * Convert HTML content to Markdown (single page)
 * @param {string} htmlContent - Raw HTML content
 * @param {string} urlPath - URL path for the page (for frontmatter)
 * @returns {Promise<string|null>} Markdown content with frontmatter or null if conversion fails
 */
async function convertToMarkdown(htmlContent, urlPath) {
  await ensureProductDataInitialized();

  const turndownService = createTurndownService();
  const metadata = extractArticleContent(htmlContent, urlPath);

  if (!metadata) {
    return null;
  }

  // Convert HTML to markdown
  let markdown = turndownService.turndown(metadata.content);

  // Clean up excessive newlines and separator artifacts
  markdown = markdown
    .replace(/\n{3,}/g, '\n\n')
    .replace(/\* \* \*\s*\n\s*\* \* \*/g, '')
    .replace(/\* \* \*\s*$/g, '')
    .trim();

  // Generate frontmatter
  const frontmatter = generateFrontmatter(metadata, urlPath);

  return `${frontmatter}\n\n${markdown}\n`;
}

/**
 * Convert section HTML with child pages to aggregated Markdown
 * @param {string} sectionHtml - HTML content of the section index page
 * @param {string} sectionUrlPath - URL path for the section
 * @param {Array} childHtmls - Array of objects with {html, url} for each child page
 * @returns {Promise<string|null>} Aggregated markdown content or null if conversion fails
 */
async function convertSectionToMarkdown(
  sectionHtml,
  sectionUrlPath,
  childHtmls
) {
  await ensureProductDataInitialized();

  const turndownService = createTurndownService();

  // Extract section metadata and content
  const sectionMetadata = extractArticleContent(sectionHtml, sectionUrlPath);
  if (!sectionMetadata) {
    return null;
  }

  // Convert section content to markdown
  let sectionMarkdown = turndownService.turndown(sectionMetadata.content);
  sectionMarkdown = sectionMarkdown
    .replace(/\n{3,}/g, '\n\n')
    .replace(/\* \* \*\s*\n\s*\* \* \*/g, '')
    .replace(/\* \* \*\s*$/g, '')
    .trim();

  // Process child pages
  const childContents = [];
  const childPageInfo = [];

  for (const { html, url } of childHtmls) {
    const childMetadata = extractArticleContent(html, url);
    if (childMetadata) {
      let childMarkdown = turndownService.turndown(childMetadata.content);
      childMarkdown = childMarkdown
        .replace(/\n{3,}/g, '\n\n')
        .replace(/\* \* \*\s*\n\s*\* \* \*/g, '')
        .replace(/\* \* \*\s*$/g, '')
        .trim();

      // Remove the first h1 heading (page title) to avoid redundancy
      // since we're adding it as an h2 heading
      childMarkdown = childMarkdown.replace(/^#\s+.+?\n+/, '');

      // Add child page title as heading
      childContents.push(`## ${childMetadata.title}\n\n${childMarkdown}`);

      // Track child page info for frontmatter
      childPageInfo.push({
        url: url,
        title: childMetadata.title,
        content: childMarkdown,
      });
    }
  }

  // Generate section frontmatter with child page info
  const frontmatter = generateSectionFrontmatter(
    { ...sectionMetadata, content: sectionMarkdown },
    sectionUrlPath,
    childPageInfo
  );

  // Combine section content with child pages
  const allContent = [sectionMarkdown, ...childContents].join('\n\n---\n\n');

  return `${frontmatter}\n\n${allContent}\n`;
}

// Export all functions for CommonJS
module.exports = {
  detectProduct,
  createTurndownService,
  extractArticleContent,
  generateFrontmatter,
  generateSectionFrontmatter,
  convertToMarkdown,
  convertSectionToMarkdown,
};