217 lines
6.0 KiB
JavaScript
217 lines
6.0 KiB
JavaScript
/**
|
|
* URL parsing utilities for documentation scaffolding
|
|
* Parses docs.influxdata.com URLs to extract product, version, and path information
|
|
*/
|
|
|
|
import { basename } from 'path';
|
|
|
|
// Base URL pattern for InfluxData documentation
|
|
const DOCS_BASE_URL = 'docs.influxdata.com';
|
|
|
|
/**
|
|
* Parse a documentation URL to extract components
|
|
* @param {string} url - Full URL or path (e.g., "https://docs.influxdata.com/influxdb3/core/admin/databases/" or "/influxdb3/core/admin/databases/")
|
|
* @returns {object} Parsed URL components
|
|
*/
|
|
export function parseDocumentationURL(url) {
|
|
// Remove protocol and domain if present
|
|
let path = url;
|
|
if (url.includes(DOCS_BASE_URL)) {
|
|
const urlObj = new URL(url);
|
|
path = urlObj.pathname;
|
|
}
|
|
|
|
// Remove leading and trailing slashes
|
|
path = path.replace(/^\/+|\/+$/g, '');
|
|
|
|
// Split into parts
|
|
const parts = path.split('/').filter((p) => p.length > 0);
|
|
|
|
if (parts.length === 0) {
|
|
throw new Error('Invalid URL: no path components');
|
|
}
|
|
|
|
// First part is the namespace (influxdb3, influxdb, telegraf, etc.)
|
|
const namespace = parts[0];
|
|
|
|
// Determine product structure based on namespace
|
|
let product = null;
|
|
let section = null;
|
|
let pagePath = [];
|
|
let isSection = false;
|
|
|
|
if (namespace === 'influxdb3') {
|
|
// InfluxDB 3 structure: /influxdb3/{product}/{section}/{...path}
|
|
if (parts.length >= 2) {
|
|
product = parts[1]; // core, enterprise, cloud-dedicated, cloud-serverless, clustered, explorer
|
|
if (parts.length >= 3) {
|
|
section = parts[2]; // admin, write-data, query-data, reference, get-started, plugins
|
|
pagePath = parts.slice(3);
|
|
}
|
|
}
|
|
} else if (namespace === 'influxdb') {
|
|
// InfluxDB 2/1 structure: /influxdb/{version}/{section}/{...path}
|
|
if (parts.length >= 2) {
|
|
const secondPart = parts[1];
|
|
if (secondPart === 'cloud') {
|
|
product = 'cloud';
|
|
if (parts.length >= 3) {
|
|
section = parts[2];
|
|
pagePath = parts.slice(3);
|
|
}
|
|
} else if (secondPart.match(/^v\d/)) {
|
|
// v2.x or v1.x
|
|
product = secondPart;
|
|
if (parts.length >= 3) {
|
|
section = parts[2];
|
|
pagePath = parts.slice(3);
|
|
}
|
|
} else {
|
|
// Assume cloudless-v2 structure: /influxdb/{section}/{...path}
|
|
section = secondPart;
|
|
pagePath = parts.slice(2);
|
|
product = 'v2'; // default
|
|
}
|
|
}
|
|
} else if (namespace === 'telegraf') {
|
|
// Telegraf structure: /telegraf/{version}/{section}/{...path}
|
|
if (parts.length >= 2) {
|
|
product = parts[1];
|
|
if (parts.length >= 3) {
|
|
section = parts[2];
|
|
pagePath = parts.slice(3);
|
|
}
|
|
}
|
|
} else if (namespace === 'kapacitor' || namespace === 'chronograf') {
|
|
// Other products: /{product}/{version}/{section}/{...path}
|
|
if (parts.length >= 2) {
|
|
product = parts[1];
|
|
if (parts.length >= 3) {
|
|
section = parts[2];
|
|
pagePath = parts.slice(3);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Determine if this is a section (directory) or single page
|
|
// Section URLs typically end with / or have no file extension
|
|
// Single page URLs typically end with a page name
|
|
if (pagePath.length === 0 && section) {
|
|
// URL points to section landing page
|
|
isSection = true;
|
|
} else if (pagePath.length > 0) {
|
|
const lastPart = pagePath[pagePath.length - 1];
|
|
// If last part looks like a directory (no dots), it's a section
|
|
isSection = !lastPart.includes('.');
|
|
}
|
|
|
|
return {
|
|
url,
|
|
namespace,
|
|
product,
|
|
section,
|
|
pagePath: pagePath.join('/'),
|
|
isSection,
|
|
fullPath: parts.join('/'),
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Validate if a URL is a valid documentation URL
|
|
* @param {string} url - URL to validate
|
|
* @returns {boolean} True if valid documentation URL
|
|
*/
|
|
export function validateDocumentationURL(url) {
|
|
try {
|
|
const parsed = parseDocumentationURL(url);
|
|
return parsed.namespace && parsed.namespace.length > 0;
|
|
} catch (error) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Convert parsed URL to potential file paths
|
|
* @param {object} parsedURL - Parsed URL from parseDocumentationURL()
|
|
* @returns {string[]} Array of potential file paths to check
|
|
*/
|
|
export function urlToFilePaths(parsedURL) {
|
|
const { namespace, product, section, pagePath, isSection } = parsedURL;
|
|
|
|
const basePaths = [];
|
|
|
|
// Build base path based on namespace and product
|
|
let contentPath = `content/${namespace}`;
|
|
if (product) {
|
|
contentPath += `/${product}`;
|
|
}
|
|
if (section) {
|
|
contentPath += `/${section}`;
|
|
}
|
|
|
|
if (pagePath) {
|
|
contentPath += `/${pagePath}`;
|
|
}
|
|
|
|
if (isSection) {
|
|
// Section could be _index.md or directory with _index.md
|
|
basePaths.push(`${contentPath}/_index.md`);
|
|
basePaths.push(`${contentPath}.md`); // Sometimes sections are single files
|
|
} else {
|
|
// Single page
|
|
basePaths.push(`${contentPath}.md`);
|
|
basePaths.push(`${contentPath}/_index.md`); // Could still be a section
|
|
}
|
|
|
|
return basePaths;
|
|
}
|
|
|
|
/**
|
|
* Extract page name from URL for use in file names
|
|
* @param {object} parsedURL - Parsed URL from parseDocumentationURL()
|
|
* @returns {string} Suggested file name
|
|
*/
|
|
export function urlToFileName(parsedURL) {
|
|
const { pagePath, section } = parsedURL;
|
|
|
|
if (pagePath && pagePath.length > 0) {
|
|
// Use last part of page path
|
|
const parts = pagePath.split('/');
|
|
return parts[parts.length - 1];
|
|
} else if (section) {
|
|
// Use section name
|
|
return section;
|
|
}
|
|
|
|
return 'index';
|
|
}
|
|
|
|
/**
|
|
* Parse multiple URLs (comma-separated or array)
|
|
* @param {string|string[]} urls - URLs to parse
|
|
* @returns {object[]} Array of parsed URLs
|
|
*/
|
|
export function parseMultipleURLs(urls) {
|
|
let urlArray = [];
|
|
|
|
if (typeof urls === 'string') {
|
|
// Split by comma if string
|
|
urlArray = urls.split(',').map((u) => u.trim());
|
|
} else if (Array.isArray(urls)) {
|
|
urlArray = urls;
|
|
} else {
|
|
throw new Error('URLs must be a string or array');
|
|
}
|
|
|
|
return urlArray
|
|
.map((url) => {
|
|
try {
|
|
return parseDocumentationURL(url);
|
|
} catch (error) {
|
|
console.error(`Error parsing URL ${url}: ${error.message}`);
|
|
return null;
|
|
}
|
|
})
|
|
.filter((parsed) => parsed !== null);
|
|
}
|