docs-v2/scripts/lib/url-parser.js

/**
 * URL parsing utilities for documentation scaffolding
 * Parses docs.influxdata.com URLs to extract product, version, and path information
 */

import { basename } from 'path';

// Base URL pattern for InfluxData documentation
const DOCS_BASE_URL = 'docs.influxdata.com';

/**
 * Parse a documentation URL to extract components
 * @param {string} url - Full URL or path (e.g., "https://docs.influxdata.com/influxdb3/core/admin/databases/" or "/influxdb3/core/admin/databases/")
 * @returns {object} Parsed URL components
 */
export function parseDocumentationURL(url) {
  // Remove protocol and domain if present
  let path = url;
  if (url.includes(DOCS_BASE_URL)) {
    const urlObj = new URL(url);
    path = urlObj.pathname;
  }

  // Remove leading and trailing slashes
  path = path.replace(/^\/+|\/+$/g, '');

  // Split into parts
  const parts = path.split('/').filter((p) => p.length > 0);

  if (parts.length === 0) {
    throw new Error('Invalid URL: no path components');
  }

  // First part is the namespace (influxdb3, influxdb, telegraf, etc.)
  const namespace = parts[0];

  // Determine product structure based on namespace
  let product = null;
  let section = null;
  let pagePath = [];
  let isSection = false;

  if (namespace === 'influxdb3') {
    // InfluxDB 3 structure: /influxdb3/{product}/{section}/{...path}
    if (parts.length >= 2) {
      product = parts[1]; // core, enterprise, cloud-dedicated, cloud-serverless, clustered, explorer
      if (parts.length >= 3) {
        section = parts[2]; // admin, write-data, query-data, reference, get-started, plugins
        pagePath = parts.slice(3);
      }
    }
  } else if (namespace === 'influxdb') {
    // InfluxDB 2/1 structure: /influxdb/{version}/{section}/{...path}
    if (parts.length >= 2) {
      const secondPart = parts[1];
      if (secondPart === 'cloud') {
        product = 'cloud';
        if (parts.length >= 3) {
          section = parts[2];
          pagePath = parts.slice(3);
        }
      } else if (secondPart.match(/^v\d/)) {
        // v2.x or v1.x
        product = secondPart;
        if (parts.length >= 3) {
          section = parts[2];
          pagePath = parts.slice(3);
        }
      } else {
        // Assume cloudless-v2 structure: /influxdb/{section}/{...path}
        section = secondPart;
        pagePath = parts.slice(2);
        product = 'v2'; // default
      }
    }
  } else if (namespace === 'telegraf') {
    // Telegraf structure: /telegraf/{version}/{section}/{...path}
    if (parts.length >= 2) {
      product = parts[1];
      if (parts.length >= 3) {
        section = parts[2];
        pagePath = parts.slice(3);
      }
    }
  } else if (namespace === 'kapacitor' || namespace === 'chronograf') {
    // Other products: /{product}/{version}/{section}/{...path}
    if (parts.length >= 2) {
      product = parts[1];
      if (parts.length >= 3) {
        section = parts[2];
        pagePath = parts.slice(3);
      }
    }
  }

  // Determine if this is a section (directory) or single page
  // Section URLs typically end with / or have no file extension
  // Single page URLs typically end with a page name
  if (pagePath.length === 0 && section) {
    // URL points to section landing page
    isSection = true;
  } else if (pagePath.length > 0) {
    const lastPart = pagePath[pagePath.length - 1];
    // If last part looks like a directory (no dots), it's a section
    isSection = !lastPart.includes('.');
  }

  return {
    url,
    namespace,
    product,
    section,
    pagePath: pagePath.join('/'),
    isSection,
    fullPath: parts.join('/'),
  };
}

/**
 * Validate if a URL is a valid documentation URL
 * @param {string} url - URL to validate
 * @returns {boolean} True if valid documentation URL
 */
export function validateDocumentationURL(url) {
  try {
    const parsed = parseDocumentationURL(url);
    return parsed.namespace && parsed.namespace.length > 0;
  } catch (error) {
    return false;
  }
}

/**
 * Convert parsed URL to potential file paths
 * @param {object} parsedURL - Parsed URL from parseDocumentationURL()
 * @returns {string[]} Array of potential file paths to check
 */
export function urlToFilePaths(parsedURL) {
  const { namespace, product, section, pagePath, isSection } = parsedURL;

  const basePaths = [];

  // Build base path based on namespace and product
  let contentPath = `content/${namespace}`;
  if (product) {
    contentPath += `/${product}`;
  }
  if (section) {
    contentPath += `/${section}`;
  }

  if (pagePath) {
    contentPath += `/${pagePath}`;
  }

  if (isSection) {
    // Section could be _index.md or directory with _index.md
    basePaths.push(`${contentPath}/_index.md`);
    basePaths.push(`${contentPath}.md`); // Sometimes sections are single files
  } else {
    // Single page
    basePaths.push(`${contentPath}.md`);
    basePaths.push(`${contentPath}/_index.md`); // Could still be a section
  }

  return basePaths;
}

/**
 * Extract page name from URL for use in file names
 * @param {object} parsedURL - Parsed URL from parseDocumentationURL()
 * @returns {string} Suggested file name
 */
export function urlToFileName(parsedURL) {
  const { pagePath, section } = parsedURL;

  if (pagePath && pagePath.length > 0) {
    // Use last part of page path
    const parts = pagePath.split('/');
    return parts[parts.length - 1];
  } else if (section) {
    // Use section name
    return section;
  }

  return 'index';
}

/**
 * Parse multiple URLs (comma-separated or array)
 * @param {string|string[]} urls - URLs to parse
 * @returns {object[]} Array of parsed URLs
 */
export function parseMultipleURLs(urls) {
  let urlArray = [];

  if (typeof urls === 'string') {
    // Split by comma if string
    urlArray = urls.split(',').map((u) => u.trim());
  } else if (Array.isArray(urls)) {
    urlArray = urls;
  } else {
    throw new Error('URLs must be a string or array');
  }

  return urlArray
    .map((url) => {
      try {
        return parseDocumentationURL(url);
      } catch (error) {
        console.error(`Error parsing URL ${url}: ${error.message}`);
        return null;
      }
    })
    .filter((parsed) => parsed !== null);
}