const stringPadding = require('string-padding'); const stringToStream = require('string-to-stream') const BLOCK_OPEN = "[[BLOCK_OPEN]]"; const BLOCK_CLOSE = "[[BLOCK_CLOSE]]"; const NEWLINE = "[[NEWLINE]]"; const NEWLINE_MERGED = "[[MERGED]]"; const SPACE = "[[SPACE]]"; // For monospace font detection (Courier, Menlo, Moncaco) const MONOSPACE_OPEN = "[[MONOSPACE_OPEN]]"; const MONOSPACE_CLOSE = "[[MONOSPACE_CLOSE]]"; // This function will return a list of all monospace sections with a flag saying whether they can be merged or not function findMonospaceSections(md) { let temp = []; let sections = []; let section = null; // This variable is used twice: to detected if a newline is between monospace sections and if a newline is inside monospace section let mergeWithPrevious = true; let last = ""; for (let i = 0; i < md.length; i++) { let v = md[i]; if (v == MONOSPACE_OPEN) { if (section != null) throw new Error('Monospace open tag detected while the previous was not closed'); // Sanity check, but normally not possible let monospaceSection = { openIndex: null, closeIndex: null, mergeAllowed: true, mergeWithPrevious: mergeWithPrevious, isEmptyLine: false, } section = monospaceSection; // Remember where monospace section begins, later it will be replaced with appropriate markdown (` or ```) section.openIndex = temp.push(v) - 1; // Add an empty string, it can be later replaced with newline if necessary temp.push(""); if (last != BLOCK_OPEN) { // We cannot merge inline code section.mergeAllowed = false; } // Reset to detect if monospace section contains a newline mergeWithPrevious = true; } else if (v == MONOSPACE_CLOSE) { if (section == null) throw new Error('Monospace tag was closed without being open before'); // Sanity check, but normally not possible if (section.closeIndex != null) throw new Error('Monospace tag is closed for the second time'); // Sanity check, but normally not possible // Add an empty string, it can be later replaced with newline if necessary temp.push(""); // Remember where monospace section ends, later it will be replaced with appropriate markdown (` or ```) section.closeIndex = temp.push(v) - 1; if (md[i+1] != BLOCK_CLOSE) { // We cannot merge inline code section.mergeAllowed = false; } section.isEmptyLine = mergeWithPrevious; sections.push(section); // Reset section = null; mergeWithPrevious = true; } else { // We can merge only if monospace sections are separated by newlines if (v != NEWLINE && v != BLOCK_OPEN && v != BLOCK_CLOSE) { mergeWithPrevious = false; } temp.push(v); } last = v; } return { md: temp, monospaceSections: sections, }; } // This function is looping over monospace sections and merging what it can merge function mergeMonospaceSections(md, sections) { const USE_BLOCK_TAG = 1; const USE_INLINE_TAG = 2; const USE_EMPTY_TAG = 3; const toMonospace = (md, section, startTag, endTag) => { // It looks better when empty lines are not inlined if (startTag == USE_INLINE_TAG && section.isEmptyLine) { startTag = USE_EMPTY_TAG; endTag = USE_EMPTY_TAG; } switch (startTag) { case USE_BLOCK_TAG: md[section.openIndex] = "```"; md[section.openIndex + 1] = NEWLINE; break; case USE_INLINE_TAG: md[section.openIndex] = "`"; break; case USE_EMPTY_TAG: md[section.openIndex] = ""; break; } switch (endTag) { case USE_BLOCK_TAG: // We don't add a NEWLINE if there already is a NEWLINE if (md[section.closeIndex - 2] == NEWLINE) { md[section.closeIndex - 1] = ""; } else { md[section.closeIndex - 1] = NEWLINE; } md[section.closeIndex] = "```"; break; case USE_INLINE_TAG: md[section.closeIndex] = "`"; break; case USE_EMPTY_TAG: md[section.closeIndex] = ""; break; } } const getSection = () => { return sections.shift(); } const getMergeableSection = (first = null) => { if (first) { sections.unshift(first); } while (sections.length) { s = sections.shift(); if (s.mergeAllowed) { return s; } // If cannot merge then convert into inline code toMonospace(md, s, USE_INLINE_TAG, USE_INLINE_TAG); } return null; } let left = getMergeableSection(); let right = null; while (left) { let isFirst = true; right = getSection(); while (right && right.mergeAllowed && right.mergeWithPrevious) { // We can merge left and right if (isFirst) { isFirst = false; // First section toMonospace(md, left, USE_BLOCK_TAG, USE_EMPTY_TAG); } else { // Middle section toMonospace(md, left, USE_EMPTY_TAG, USE_EMPTY_TAG); } left = right; right = getSection(); } if (isFirst) { // Could not merge, convert to inline code toMonospace(md, left, USE_INLINE_TAG, USE_INLINE_TAG); } else { // Was merged, add block end tag toMonospace(md, left, USE_EMPTY_TAG, USE_BLOCK_TAG); } left = getMergeableSection(right); } } // This function will try to merge monospace sections // It works in two phases: // 1) It will find all monospace sections // 2) It will merge all monospace sections where merge is allowed function mergeMonospaceSectionsWrapper(md, ignoreMonospace = false) { if (!ignoreMonospace) { const result = findMonospaceSections(md); if (result.monospaceSections.length > 0) { mergeMonospaceSections(result.md, result.monospaceSections); } md = result.md; } // Remove empty items, it is necessary for correct function of newline merging happening outside this function let temp = [] for (let i = 0; i < md.length; i++) { let v = md[i]; if (ignoreMonospace && (v == MONOSPACE_OPEN || v == MONOSPACE_CLOSE)) { continue; // skip } if (v != "") { temp.push(v); } } return temp; } function processMdArrayNewLines(md, isTable = false) { // console.info(md); // Try to merge MONOSPACE sections, works good when when not parsing a table // md = mergeMonospaceSectionsWrapper(md, isTable); while (md.length && md[0] == BLOCK_OPEN) { md.shift(); } while (md.length && md[md.length - 1] == BLOCK_CLOSE) { md.pop(); } let temp = []; let last = ''; for (let i = 0; i < md.length; i++) { let v = md[i]; if (isNewLineBlock(last) && isNewLineBlock(v) && last == v) { // Skip it } else { temp.push(v); } last = v; } md = temp; temp = []; last = ""; for (let i = 0; i < md.length; i++) { let v = md[i]; if (last == BLOCK_CLOSE && v == BLOCK_OPEN) { temp.pop(); temp.push(NEWLINE_MERGED); } else { temp.push(v); } last = v; } md = temp; temp = []; last = ""; for (let i = 0; i < md.length; i++) { let v = md[i]; if (last == NEWLINE && (v == NEWLINE_MERGED || v == BLOCK_CLOSE)) { // Skip it } else { temp.push(v); } last = v; } md = temp; // NEW!!! temp = []; last = ""; for (let i = 0; i < md.length; i++) { let v = md[i]; if (last == NEWLINE && (v == NEWLINE_MERGED || v == BLOCK_OPEN)) { // Skip it } else { temp.push(v); } last = v; } md = temp; if (md.length > 2) { if (md[md.length - 2] == NEWLINE_MERGED && md[md.length - 1] == NEWLINE) { md.pop(); } } // console.info(md); let output = ''; let previous = ''; let start = true; for (let i = 0; i < md.length; i++) { let v = md[i]; let add = ''; if (v == BLOCK_CLOSE || v == BLOCK_OPEN || v == NEWLINE || v == NEWLINE_MERGED) { add = "\n"; } else if (v == SPACE) { if (previous == SPACE || previous == "\n" || start) { continue; // skip } else { add = " "; } } else { add = v; } start = false; output += add; previous = add; } if (!output.trim().length) return ''; // To simplify the result, we only allow up to one empty line between blocks of text const mergeMultipleNewLines = function(lines) { let output = []; let newlineCount = 0; for (let i = 0; i < lines.length; i++) { const line = lines[i]; if (!line.trim()) { newlineCount++; } else { newlineCount = 0; } if (newlineCount >= 2) continue; output.push(line); } return output; } let lines = output.replace(/\\r/g, '').split('\n'); lines = formatMdLayout(lines) // lines = convertSingleLineCodeBlocksToInline(lines) lines = mergeMultipleNewLines(lines); return lines.join('\n'); } // While the processMdArrayNewLines() function adds newlines in a way that's technically correct, the resulting Markdown can look messy. // This is because while a "block" element should be surrounded by newlines, in practice, some should be surrounded by TWO new lines, while // others by only ONE. // // For instance, this: // //
  • one
  • //
  • two
  • //
  • three
  • // // should result in this: // // - one // - two // - three // // While this: // //

    Some long paragraph

    And another one

    And the last paragraph

    // // should result in this: // // Some long paragraph // // And another one // // And the last paragraph // // So in one case, one newline between tags, and in another two newlines. In HTML this would be done via CSS, but in Markdown we need // to add new lines. It's also important to get these newlines right because two blocks of text next to each others might be renderered // differently than if there's a newlines between them. So the function below parses the almost final MD and add new lines depending // on various rules. const isHeading = function(line) { return !!line.match(/^#+\s/); } const isListItem = function(line) { return line && line.trim().indexOf('- ') === 0; } const isCodeLine = function(line) { return line && line.indexOf('\t') === 0; } const isTableLine = function(line) { return line.indexOf('| ') === 0; } const isPlainParagraph = function(line) { // Note: if a line is no longer than 80 characters, we don't consider it's a paragraph, which // means no newlines will be added before or after. This is to handle text that has been // written with "hard" new lines. if (!line || line.length < 80) return false; if (isListItem(line)) return false; if (isHeading(line)) return false; if (isCodeLine(line)) return false; if (isTableLine(line)) return false; return true; } function formatMdLayout(lines) { let previous = ''; let newLines = []; for (let i = 0; i < lines.length; i++) { const line = lines[i]; // Add a new line at the end of a list of items if (isListItem(previous) && line && !isListItem(line)) { newLines.push(''); // Add a new line at the beginning of a list of items } else if (isListItem(line) && previous && !isListItem(previous)) { newLines.push(''); // Add a new line before a heading } else if (isHeading(line) && previous) { newLines.push(''); // Add a new line after a heading } else if (isHeading(previous) && line) { newLines.push(''); } else if (isCodeLine(line) && !isCodeLine(previous)) { newLines.push(''); } else if (!isCodeLine(line) && isCodeLine(previous)) { newLines.push(''); } else if (isTableLine(line) && !isTableLine(previous)) { newLines.push(''); } else if (!isTableLine(line) && isTableLine(previous)) { newLines.push(''); // Add a new line at beginning of paragraph } else if (isPlainParagraph(line) && previous) { newLines.push(''); // Add a new line at end of paragraph } else if (isPlainParagraph(previous) && line) { newLines.push(''); } newLines.push(line); previous = newLines[newLines.length - 1]; } return newLines; } function lineStartsWithDelimiter(line) { if (!line || !line.length) return false; return ' ,.;:)]}'.indexOf(line[0]) >= 0; } // function convertSingleLineCodeBlocksToInline(lines) { // let newLines = []; // let currentCodeLines = []; // let codeLineCount = 0; // const processCurrentCodeLines = (line) => { // if (codeLineCount === 1) { // const inlineCode = currentCodeLines.join('').trim(); // newLines[newLines.length - 1] += '`' + inlineCode + '`'; // if (line) newLines[newLines.length - 1] += (lineStartsWithDelimiter(line) ? '' : ' ') + line; // } else { // newLines = newLines.concat(currentCodeLines); // newLines.push(line); // } // currentCodeLines = []; // codeLineCount = 0; // } // for (let i = 0; i < lines.length; i++) { // const line = lines[i]; // if (isCodeLine(line)) { // currentCodeLines.push(line); // codeLineCount++; // } else if (!line.trim()) { // currentCodeLines.push(line); // } else { // if (currentCodeLines.length) { // processCurrentCodeLines(line); // } else { // newLines.push(line); // } // } // } // if (currentCodeLines.length) processCurrentCodeLines(''); // return newLines; // } function isWhiteSpace(c) { return c == '\n' || c == '\r' || c == '\v' || c == '\f' || c == '\t' || c == ' '; } // Like QString::simpified(), except that it preserves non-breaking spaces (which // Evernote uses for identation, etc.) function simplifyString(s) { let output = ''; let previousWhite = false; for (let i = 0; i < s.length; i++) { let c = s[i]; let isWhite = isWhiteSpace(c); if (previousWhite && isWhite) { // skip } else { output += c; } previousWhite = isWhite; } while (output.length && isWhiteSpace(output[0])) output = output.substr(1); while (output.length && isWhiteSpace(output[output.length - 1])) output = output.substr(0, output.length - 1); return output; } function collapseWhiteSpaceAndAppend(lines, state, text) { if (state.inCode.length) { lines.push(text); // state.currentCode += text; // let previous = lines.length ? lines[lines.length - 1] : ''; // // If the preceding item is a block limit, then the current line should start with a TAB // if ([BLOCK_OPEN, BLOCK_CLOSE, NEWLINE, NEWLINE_MERGED, MONOSPACE_OPEN, MONOSPACE_CLOSE].indexOf(previous) >= 0 || !previous) { // //text = "\t" + text; // lines.push('\t'); // lines.push(text); // } else { // // If the current text contains one or more \n, then the last one should be immediately followed by a TAB // const idx = text.lastIndexOf('\n'); // if (idx >= 0) { // text = text.substr(0, idx+1) + '\t' + text.substr(idx+1); // } // lines.push(text); // } } else { // console.info(lines); // Remove all \n and \r from the left and right of the text while (text.length && (text[0] == "\n" || text[0] == "\r")) text = text.substr(1); while (text.length && (text[text.length - 1] == "\n" || text[text.length - 1] == "\r")) text = text.substr(0, text.length - 1); // Replace the inner \n with a space text = text.replace(/[\n\r]+/g, ' '); // Collapse all white spaces to just one. If there are spaces to the left and right of the string // also collapse them to just one space. let spaceLeft = text.length && text[0] == ' '; let spaceRight = text.length && text[text.length - 1] == ' '; text = simplifyString(text); if (!spaceLeft && !spaceRight && text == "") return lines; if (state.inQuote) { // Add a ">" at the beginning of the block then at the beginning of each lines. So it turns this: // "my quote\nsecond line" into this => "> my quote\n> second line" lines.push('> '); if (lines.indexOf('\r') >= 0) { text = text.replace(/\n\r/g, '\n\r> '); } else { text = text.replace(/\n/g, '\n> '); } } if (spaceLeft) lines.push(SPACE); lines.push(text); if (spaceRight) lines.push(SPACE); } return lines; } const imageMimeTypes = ["image/cgm", "image/fits", "image/g3fax", "image/gif", "image/ief", "image/jp2", "image/jpeg", "image/jpm", "image/jpx", "image/naplps", "image/png", "image/prs.btif", "image/prs.pti", "image/t38", "image/tiff", "image/tiff-fx", "image/vnd.adobe.photoshop", "image/vnd.cns.inf2", "image/vnd.djvu", "image/vnd.dwg", "image/vnd.dxf", "image/vnd.fastbidsheet", "image/vnd.fpx", "image/vnd.fst", "image/vnd.fujixerox.edmics-mmr", "image/vnd.fujixerox.edmics-rlc", "image/vnd.globalgraphics.pgb", "image/vnd.microsoft.icon", "image/vnd.mix", "image/vnd.ms-modi", "image/vnd.net-fpx", "image/vnd.sealed.png", "image/vnd.sealedmedia.softseal.gif", "image/vnd.sealedmedia.softseal.jpg", "image/vnd.svf", "image/vnd.wap.wbmp", "image/vnd.xiff"]; function isImageMimeType(m) { return imageMimeTypes.indexOf(m) >= 0; } function addResourceTag(lines, resource, alt = "") { // TODO: refactor to use Resource.markdownTag let tagAlt = alt == "" ? resource.alt : alt; if (!tagAlt) tagAlt = ''; if (isImageMimeType(resource.mime)) { lines.push("!["); lines.push(tagAlt); lines.push("](:/" + resource.id + ")"); } else { lines.push("["); lines.push(tagAlt); lines.push("](:/" + resource.id + ")"); } return lines; } function isBlockTag(n) { return ["div", "p", "dl", "dd", 'dt', "center", 'address', 'form', 'input', 'section', 'nav', 'header', 'article', 'textarea', 'footer', 'fieldset', 'summary', 'details'].indexOf(n) >= 0; } function isStrongTag(n) { return n == "strong" || n == "b" || n == 'big'; } function isStrikeTag(n) { return n == "strike" || n == "s" || n == 'del'; } function isEmTag(n) { return n == "em" || n == "i" || n == "u"; } function isAnchor(n) { return n == "a"; } function isIgnoredEndTag(n) { return ["en-note", "en-todo", "span", "body", "html", "font", "br", 'hr', 'tbody', 'sup', 'img', 'abbr', 'cite', 'thead', 'small', 'tt', 'sub', 'colgroup', 'col', 'ins', 'caption', 'var', 'map', 'area', 'label', 'legend', 'time-ago', 'relative-time'].indexOf(n) >= 0; } function isListTag(n) { return n == "ol" || n == "ul"; } // Elements that don't require any special treatment beside adding a newline character function isNewLineOnlyEndTag(n) { return ["div", "p", "h1", "h2", "h3", "h4", "h5", 'h6', "dl", "dd", 'dt', "center", 'address', 'form', 'input', 'section', 'nav', 'header', 'article', 'textarea', 'footer', 'fieldset', 'summary', 'details'].indexOf(n) >= 0; } // Tags that must be ignored - both the tag and its content. function isIgnoredContentTag(n) { return ['script', 'style', 'iframe', 'select', 'option', 'button', 'video', 'source', 'svg', 'path'].indexOf(n) >= 0 } function isInlineCodeTag(n) { return ['samp', 'kbd'].indexOf(n) >= 0; } function isNewLineBlock(s) { return s == BLOCK_OPEN || s == BLOCK_CLOSE; } function xmlNodeText(xmlNode) { if (!xmlNode || !xmlNode.length) return ''; return xmlNode[0]; } function attributeToLowerCase(node) { if (!node.attributes) return {}; let output = {}; for (let n in node.attributes) { if (!node.attributes.hasOwnProperty(n)) continue; output[n.toLowerCase()] = node.attributes[n]; } return output; } function urlWithoutPath(url) { const parsed = require('url').parse(url, true); return parsed.protocol + '//' + parsed.host; } function urlProtocol(url) { const parsed = require('url').parse(url, true); return parsed.protocol; } const schemeRegex = /[a-zA-Z0-9\+\-\.]+:\/\// // Make sure baseUrl doesn't end with a slash function prependBaseUrl(url, baseUrl) { if (!url) url = ''; if (!baseUrl) return url; const matches = schemeRegex.exec(url); if (matches) return url; // Don't prepend the base URL if the URL already has a scheme if (url.length >= 2 && url.indexOf('//') === 0) { // If it starts with // it's a protcol-relative URL return urlProtocol(baseUrl) + url; } else if (url && url[0] === '/') { // If it starts with a slash, it's an absolute URL so it should be relative to the domain (and not to the full baseUrl) return urlWithoutPath(baseUrl) + url; } else { return baseUrl + '/' + url; } } function enexXmlToMdArray(stream, resources, options = {}) { if (options.baseUrl) options.baseUrl = options.baseUrl.replace(/[\/]+$/, ''); let remainingResources = resources.slice(); const removeRemainingResource = (id) => { for (let i = 0; i < remainingResources.length; i++) { const r = remainingResources[i]; if (r.id === id) { remainingResources.splice(i, 1); } } } return new Promise((resolve, reject) => { let state = { inCode: [], inPre: false, inQuote: false, inMonospaceFont: false, inCodeblock: 0, lists: [], anchorAttributes: [], ignoreContents: [], ignoreWhiteSpace: [], warningsTags: [], }; // In some cases white space should be ignored. For example, this: //