Clipper: Fixed issue with relative links when importing HTML

pull/1742/head
Laurent Cozic 2019-07-15 00:44:45 +01:00
parent 5460a977b1
commit 74ee629266
2 changed files with 28 additions and 6 deletions

View File

@ -1,5 +1,13 @@
const jsdom = require("jsdom");
const { JSDOM } = jsdom;
const urlUtils = require('lib/urlUtils.js');
function headAndBodyHtml_(doc) {
const output = [];
if (doc.head) output.push(doc.head.innerHTML);
if (doc.body) output.push(doc.body.innerHTML);
return output.join('\n');
}
const htmlUtils = {
@ -34,11 +42,22 @@ const htmlUtils = {
// This function returns the head and body but without the <head> and <body>
// tag, which for our purpose are not needed and can cause issues when present.
const output = [];
if (doc.head) output.push(doc.head.innerHTML);
if (doc.body) output.push(doc.body.innerHTML);
return output.join('\n');
},
return headAndBodyHtml_(doc);
},
prependBaseUrl(html, baseUrl) {
const dom = new JSDOM(html);
const doc = dom.window.document;
const anchors = doc.getElementsByTagName('a');
for (const anchor of anchors) {
const href = anchor.getAttribute('href');
const newHref = urlUtils.prependBaseUrl(href, baseUrl);
anchor.setAttribute('href', newHref);
}
return headAndBodyHtml_(doc);
},
};

View File

@ -432,6 +432,8 @@ class Api {
if (requestNote.id) output.id = requestNote.id;
const baseUrl = requestNote.base_url ? requestNote.base_url : '';
if (requestNote.body_html) {
if (requestNote.convert_to === 'html') {
const style = await this.buildNoteStyleSheet_(requestNote.stylesheets);
@ -446,13 +448,14 @@ class Api {
// means a code block in Markdown.
collapseWhitespace: true,
});
output.body = htmlUtils.prependBaseUrl(output.body, baseUrl);
output.markup_language = Note.MARKUP_LANGUAGE_HTML;
} else { // Convert to Markdown
// Parsing will not work if the HTML is not wrapped in a top level tag, which is not guaranteed
// when getting the content from elsewhere. So here wrap it - it won't change anything to the final
// rendering but it makes sure everything will be parsed.
output.body = await this.htmlToMdParser().parse('<div>' + requestNote.body_html + '</div>', {
baseUrl: requestNote.base_url ? requestNote.base_url : '',
baseUrl: baseUrl,
anchorNames: requestNote.anchor_names ? requestNote.anchor_names : [],
});
output.markup_language = Note.MARKUP_LANGUAGE_MARKDOWN;