joplin/packages/lib/markdownUtils.ts

221 lines
6.5 KiB
TypeScript

import { validateLinks } from '@joplin/renderer';
const stringPadding = require('string-padding');
const urlUtils = require('./urlUtils');
const MarkdownIt = require('markdown-it');
// Taken from codemirror/addon/edit/continuelist.js
const listRegex = /^(\s*)([*+-] \[[x ]\]\s|[*+-]\s|(\d+)([.)]\s))(\s*)/;
const emptyListRegex = /^(\s*)([*+-] \[[x ]\]|[*+-]|(\d+)[.)])(\s+)$/;
export enum MarkdownTableJustify {
Left = 'left',
Center = 'center',
Right = 'right,',
}
export interface MarkdownTableHeader {
name: string;
label: string;
// eslint-disable-next-line @typescript-eslint/ban-types -- Old code before rule was applied
filter?: Function;
disableEscape?: boolean;
justify?: MarkdownTableJustify;
}
export interface MarkdownTableRow {
[key: string]: string;
}
const markdownUtils = {
// Titles for markdown links only need escaping for [ and ]
escapeTitleText(text: string) {
return text.replace(/(\[|\])/g, '\\$1');
},
escapeLinkUrl(url: string) {
url = url.replace(/\(/g, '%28');
url = url.replace(/\)/g, '%29');
url = url.replace(/ /g, '%20');
return url;
},
escapeTableCell(text: string) {
// Disable HTML code
text = text.replace(/</g, '&lt;');
text = text.replace(/>/g, '&gt;');
// Table cells can't contain new lines so replace with <br/>
text = text.replace(/\n/g, '<br/>');
// "|" is a reserved characters that should be escaped
text = text.replace(/\|/g, '\\|');
return text;
},
escapeInlineCode(text: string): string {
// https://github.com/github/markup/issues/363#issuecomment-55499909
return text.replace(/`/g, '``');
},
unescapeLinkUrl(url: string) {
url = url.replace(/%28/g, '(');
url = url.replace(/%29/g, ')');
url = url.replace(/%20/g, ' ');
return url;
},
prependBaseUrl(md: string, baseUrl: string) {
// eslint-disable-next-line no-useless-escape
return md.replace(/(\]\()([^\s\)]+)(.*?\))/g, (_match: any, before: string, url: string, after: string) => {
return before + urlUtils.prependBaseUrl(url, baseUrl) + after;
});
},
// Returns the **encoded** URLs, so to be useful they should be decoded again before use.
extractFileUrls(md: string, onlyType: string = null): string[] {
const markdownIt = new MarkdownIt();
markdownIt.validateLink = validateLinks; // Necessary to support file:/// links
const env = {};
const tokens = markdownIt.parse(md, env);
const output: string[] = [];
let linkType = onlyType;
if (linkType === 'pdf') linkType = 'link_open';
const searchUrls = (tokens: any[]) => {
for (let i = 0; i < tokens.length; i++) {
const token = tokens[i];
if ((!onlyType && (token.type === 'link_open' || token.type === 'image')) || (!!onlyType && token.type === onlyType) || (onlyType === 'pdf' && token.type === 'link_open')) {
// Pdf embeds are a special case, they are represented as 'link_open' tokens but are marked with 'embedded_pdf' as link name by the parser
// We are making sure if its in the proper pdf link format, only then we add it to the list
if (onlyType === 'pdf' && !(tokens.length > i + 1 && tokens[i + 1].type === 'text' && tokens[i + 1].content === 'embedded_pdf')) continue;
for (let j = 0; j < token.attrs.length; j++) {
const a = token.attrs[j];
if ((a[0] === 'src' || a[0] === 'href') && a.length >= 2 && a[1]) {
output.push(a[1]);
}
}
}
if (token.children && token.children.length) {
searchUrls(token.children);
}
}
};
searchUrls(tokens);
return output;
},
replaceResourceUrl(md: string, urlToReplace: string, id: string) {
const linkRegex = `(?<=\\]\\()\\<?${urlToReplace}\\>?(?=.*\\))`;
const reg = new RegExp(linkRegex, 'g');
return md.replace(reg, `:/${id}`);
},
extractImageUrls(md: string) {
return markdownUtils.extractFileUrls(md, 'image');
},
extractPdfUrls(md: string) {
return markdownUtils.extractFileUrls(md, 'pdf');
},
// The match results has 5 items
// Full match array is
// [Full match, whitespace, list token, ol line number, whitespace following token]
olLineNumber(line: string) {
const match = line.match(listRegex);
return match ? Number(match[3]) : 0;
},
extractListToken(line: string) {
const match = line.match(listRegex);
return match ? match[2] : '';
},
isListItem(line: string) {
return listRegex.test(line);
},
isEmptyListItem(line: string) {
return emptyListRegex.test(line);
},
createMarkdownTable(headers: MarkdownTableHeader[], rows: MarkdownTableRow[]): string {
const output = [];
const minCellWidth = 5;
const headersMd = [];
const lineMd = [];
for (let i = 0; i < headers.length; i++) {
const h = headers[i];
headersMd.push(stringPadding(h.label, minCellWidth, ' ', stringPadding.RIGHT));
const justify = h.justify ? h.justify : MarkdownTableJustify.Left;
if (justify === MarkdownTableJustify.Left) {
lineMd.push('-----');
} else if (justify === MarkdownTableJustify.Center) {
lineMd.push(':---:');
} else {
lineMd.push('----:');
}
}
output.push(`| ${headersMd.join(' | ')} |`);
output.push(`| ${lineMd.join(' | ')} |`);
for (let i = 0; i < rows.length; i++) {
const row = rows[i];
const rowMd = [];
for (let j = 0; j < headers.length; j++) {
const h = headers[j];
const value = (h.filter ? h.filter(row[h.name]) : row[h.name]) || '';
const valueMd = h.disableEscape ? value : markdownUtils.escapeTableCell(value);
rowMd.push(stringPadding(valueMd, minCellWidth, ' ', stringPadding.RIGHT));
}
output.push(`| ${rowMd.join(' | ')} |`);
}
return output.join('\n');
},
countTableColumns(line: string) {
if (!line) return 0;
const trimmed = line.trim();
let pipes = (line.match(/\|/g) || []).length;
if (trimmed[0] === '|') { pipes -= 1; }
if (trimmed[trimmed.length - 1] === '|') { pipes -= 1; }
return pipes + 1;
},
matchingTableDivider(header: string, divider: string) {
if (!header || !divider) return false;
const invalidChars = divider.match(/[^\s\-:|]/g);
if (invalidChars) { return false; }
const columns = markdownUtils.countTableColumns(header);
const cols = markdownUtils.countTableColumns(divider);
return cols > 0 && (cols >= columns);
},
titleFromBody(body: string) {
if (!body) return '';
const mdLinkRegex = /!?\[([^\]]+?)\]\(.+?\)/g;
const emptyMdLinkRegex = /!?\[\]\((.+?)\)/g;
const filterRegex = /^[# \n\t*`-]*/;
const lines = body.trim().split('\n');
const title = lines[0].trim();
return title.replace(filterRegex, '').replace(mdLinkRegex, '$1').replace(emptyMdLinkRegex, '$1').substring(0, 80);
},
};
export default markdownUtils;