From f144daed967df661353dbae713343c9b00d2a9a3 Mon Sep 17 00:00:00 2001 From: Laurent Cozic Date: Thu, 23 Sep 2021 15:35:49 +0100 Subject: [PATCH] Desktop, Cli: Allow importing certain corrupted ENEX files --- packages/lib/import-enex.ts | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/packages/lib/import-enex.ts b/packages/lib/import-enex.ts index 5ea403a582..597d4b168d 100644 --- a/packages/lib/import-enex.ts +++ b/packages/lib/import-enex.ts @@ -300,7 +300,32 @@ interface NoteResourceRecognition { objID?: string; } -export default function importEnex(parentFolderId: string, filePath: string, importOptions: ImportOptions = null) { +const preProcessFile = async (filePath: string): Promise => { + const content: string = await shim.fsDriver().readFile(filePath, 'utf8'); + + // The note content in an ENEX file is wrapped in a CDATA block so it means + // that any "]]>" inside the note must be somehow escaped, or else the CDATA + // block would be closed at the wrong point. + // + // The problem is that Evernote appears to encode "]]>" as "]]]]>" + // instead of the more sensible "]]>", or perhaps they have nothing in + // place to properly escape data imported from their web clipper. In any + // case it results in invalid XML that Evernote cannot even import back. + // + // Handling that invalid XML with SAX would also be very tricky, so instead + // we add a pre-processing step that converts this tags to just ">". It + // should be safe to do so because such content can only be within the body + // of a note - and ">" or ">" is equivalent. + // + // Ref: https://discourse.joplinapp.org/t/20470/4 + const newContent = content.replace(/\]\]>/g, '>'); + if (content === newContent) return filePath; + const newFilePath = `${Setting.value('tempDir')}/${md5(Date.now() + Math.random())}.enex`; + await shim.fsDriver().writeFile(newFilePath, newContent, 'utf8'); + return newFilePath; +}; + +export default async function importEnex(parentFolderId: string, filePath: string, importOptions: ImportOptions = null) { if (!importOptions) importOptions = {}; if (!('fuzzyMatching' in importOptions)) importOptions.fuzzyMatching = false; if (!('onProgress' in importOptions)) importOptions.onProgress = function() {}; @@ -323,6 +348,9 @@ export default function importEnex(parentFolderId: string, filePath: string, imp }; } + const fileToProcess = await preProcessFile(filePath); + const needToDeleteFileToProcess = fileToProcess !== filePath; + return new Promise((resolve) => { const progressState = { loaded: 0, @@ -333,7 +361,7 @@ export default function importEnex(parentFolderId: string, filePath: string, imp notesTagged: 0, }; - const stream = fs.createReadStream(filePath); + const stream = fs.createReadStream(fileToProcess); const options = {}; const strict = true; @@ -613,6 +641,7 @@ export default function importEnex(parentFolderId: string, filePath: string, imp void processNotes().then(allDone => { if (allDone) { shim.clearTimeout(iid); + if (needToDeleteFileToProcess) void shim.fsDriver().remove(fileToProcess); resolve(null); } });