docs-v2/scripts/lib/url-parser.js

217 lines
6.0 KiB
JavaScript

/**
* URL parsing utilities for documentation scaffolding
* Parses docs.influxdata.com URLs to extract product, version, and path information
*/
import { basename } from 'path';
// Base URL pattern for InfluxData documentation
const DOCS_BASE_URL = 'docs.influxdata.com';
/**
* Parse a documentation URL to extract components
* @param {string} url - Full URL or path (e.g., "https://docs.influxdata.com/influxdb3/core/admin/databases/" or "/influxdb3/core/admin/databases/")
* @returns {object} Parsed URL components
*/
export function parseDocumentationURL(url) {
// Remove protocol and domain if present
let path = url;
if (url.includes(DOCS_BASE_URL)) {
const urlObj = new URL(url);
path = urlObj.pathname;
}
// Remove leading and trailing slashes
path = path.replace(/^\/+|\/+$/g, '');
// Split into parts
const parts = path.split('/').filter((p) => p.length > 0);
if (parts.length === 0) {
throw new Error('Invalid URL: no path components');
}
// First part is the namespace (influxdb3, influxdb, telegraf, etc.)
const namespace = parts[0];
// Determine product structure based on namespace
let product = null;
let section = null;
let pagePath = [];
let isSection = false;
if (namespace === 'influxdb3') {
// InfluxDB 3 structure: /influxdb3/{product}/{section}/{...path}
if (parts.length >= 2) {
product = parts[1]; // core, enterprise, cloud-dedicated, cloud-serverless, clustered, explorer
if (parts.length >= 3) {
section = parts[2]; // admin, write-data, query-data, reference, get-started, plugins
pagePath = parts.slice(3);
}
}
} else if (namespace === 'influxdb') {
// InfluxDB 2/1 structure: /influxdb/{version}/{section}/{...path}
if (parts.length >= 2) {
const secondPart = parts[1];
if (secondPart === 'cloud') {
product = 'cloud';
if (parts.length >= 3) {
section = parts[2];
pagePath = parts.slice(3);
}
} else if (secondPart.match(/^v\d/)) {
// v2.x or v1.x
product = secondPart;
if (parts.length >= 3) {
section = parts[2];
pagePath = parts.slice(3);
}
} else {
// Assume cloudless-v2 structure: /influxdb/{section}/{...path}
section = secondPart;
pagePath = parts.slice(2);
product = 'v2'; // default
}
}
} else if (namespace === 'telegraf') {
// Telegraf structure: /telegraf/{version}/{section}/{...path}
if (parts.length >= 2) {
product = parts[1];
if (parts.length >= 3) {
section = parts[2];
pagePath = parts.slice(3);
}
}
} else if (namespace === 'kapacitor' || namespace === 'chronograf') {
// Other products: /{product}/{version}/{section}/{...path}
if (parts.length >= 2) {
product = parts[1];
if (parts.length >= 3) {
section = parts[2];
pagePath = parts.slice(3);
}
}
}
// Determine if this is a section (directory) or single page
// Section URLs typically end with / or have no file extension
// Single page URLs typically end with a page name
if (pagePath.length === 0 && section) {
// URL points to section landing page
isSection = true;
} else if (pagePath.length > 0) {
const lastPart = pagePath[pagePath.length - 1];
// If last part looks like a directory (no dots), it's a section
isSection = !lastPart.includes('.');
}
return {
url,
namespace,
product,
section,
pagePath: pagePath.join('/'),
isSection,
fullPath: parts.join('/'),
};
}
/**
* Validate if a URL is a valid documentation URL
* @param {string} url - URL to validate
* @returns {boolean} True if valid documentation URL
*/
export function validateDocumentationURL(url) {
try {
const parsed = parseDocumentationURL(url);
return parsed.namespace && parsed.namespace.length > 0;
} catch (error) {
return false;
}
}
/**
* Convert parsed URL to potential file paths
* @param {object} parsedURL - Parsed URL from parseDocumentationURL()
* @returns {string[]} Array of potential file paths to check
*/
export function urlToFilePaths(parsedURL) {
const { namespace, product, section, pagePath, isSection } = parsedURL;
const basePaths = [];
// Build base path based on namespace and product
let contentPath = `content/${namespace}`;
if (product) {
contentPath += `/${product}`;
}
if (section) {
contentPath += `/${section}`;
}
if (pagePath) {
contentPath += `/${pagePath}`;
}
if (isSection) {
// Section could be _index.md or directory with _index.md
basePaths.push(`${contentPath}/_index.md`);
basePaths.push(`${contentPath}.md`); // Sometimes sections are single files
} else {
// Single page
basePaths.push(`${contentPath}.md`);
basePaths.push(`${contentPath}/_index.md`); // Could still be a section
}
return basePaths;
}
/**
* Extract page name from URL for use in file names
* @param {object} parsedURL - Parsed URL from parseDocumentationURL()
* @returns {string} Suggested file name
*/
export function urlToFileName(parsedURL) {
const { pagePath, section } = parsedURL;
if (pagePath && pagePath.length > 0) {
// Use last part of page path
const parts = pagePath.split('/');
return parts[parts.length - 1];
} else if (section) {
// Use section name
return section;
}
return 'index';
}
/**
* Parse multiple URLs (comma-separated or array)
* @param {string|string[]} urls - URLs to parse
* @returns {object[]} Array of parsed URLs
*/
export function parseMultipleURLs(urls) {
let urlArray = [];
if (typeof urls === 'string') {
// Split by comma if string
urlArray = urls.split(',').map((u) => u.trim());
} else if (Array.isArray(urls)) {
urlArray = urls;
} else {
throw new Error('URLs must be a string or array');
}
return urlArray
.map((url) => {
try {
return parseDocumentationURL(url);
} catch (error) {
console.error(`Error parsing URL ${url}: ${error.message}`);
return null;
}
})
.filter((parsed) => parsed !== null);
}