mirror of https://github.com/laurent22/joplin.git
Desktop, Cli: Fix ENEX import issue
Ref: https://discourse.joplinapp.org/t/error-importing-notes-from-format-enex/35001pull/9691/head
parent
4e8863d81f
commit
20b1c2e7cb
|
@ -6,16 +6,16 @@ const os = require('os');
|
|||
const { filename } = require('./path-utils');
|
||||
import { setupDatabaseAndSynchronizer, switchClient, expectNotThrow, supportDir, expectThrow } from './testing/test-utils';
|
||||
const { enexXmlToMd } = require('./import-enex-md-gen.js');
|
||||
import importEnex from './import-enex';
|
||||
import importEnex, { ImportOptions } from './import-enex';
|
||||
import Note from './models/Note';
|
||||
import Tag from './models/Tag';
|
||||
import Resource from './models/Resource';
|
||||
|
||||
const enexSampleBaseDir = `${supportDir}/../enex_to_md`;
|
||||
|
||||
const importEnexFile = async (filename: string) => {
|
||||
const importEnexFile = async (filename: string, options: ImportOptions = null) => {
|
||||
const filePath = `${enexSampleBaseDir}/${filename}`;
|
||||
await importEnex('', filePath);
|
||||
await importEnex('', filePath, options);
|
||||
};
|
||||
|
||||
const readExpectedFile = async (filename: string) => {
|
||||
|
@ -221,7 +221,7 @@ describe('import-enex-md-gen', () => {
|
|||
});
|
||||
|
||||
it('should resolve note links', async () => {
|
||||
await importEnexFile('linked_notes.enex');
|
||||
await importEnexFile('linked_notes.enex', { batchSize: 1 });
|
||||
const notes: NoteEntity[] = await Note.all();
|
||||
|
||||
const note1 = notes.find(n => n.title === 'Note 1');
|
||||
|
|
|
@ -58,7 +58,6 @@ interface ParserState {
|
|||
spanAttributes: string[];
|
||||
tags: ParserStateTag[];
|
||||
currentCode?: string;
|
||||
evernoteLinkTitles: Record<string, string>;
|
||||
}
|
||||
|
||||
|
||||
|
@ -608,7 +607,6 @@ function enexXmlToMdArray(stream: any, resources: ResourceEntity[], tasks: Extra
|
|||
anchorAttributes: [],
|
||||
spanAttributes: [],
|
||||
tags: [],
|
||||
evernoteLinkTitles: {},
|
||||
};
|
||||
|
||||
const options = {};
|
||||
|
|
|
@ -1,10 +1,8 @@
|
|||
import uuid from './uuid';
|
||||
import BaseModel from './BaseModel';
|
||||
import Note from './models/Note';
|
||||
import Tag from './models/Tag';
|
||||
import Resource from './models/Resource';
|
||||
import Setting from './models/Setting';
|
||||
import time from './time';
|
||||
import shim from './shim';
|
||||
import { NoteEntity, ResourceEntity } from './services/database/types';
|
||||
import { enexXmlToMd } from './import-enex-md-gen';
|
||||
|
@ -15,7 +13,6 @@ import { extractUrls as extractUrlsFromMarkdown } from '@joplin/utils/markdown';
|
|||
const moment = require('moment');
|
||||
const { wrapError } = require('./errorUtils');
|
||||
const { enexXmlToHtml } = require('./import-enex-html-gen.js');
|
||||
const Levenshtein = require('levenshtein');
|
||||
const md5 = require('md5');
|
||||
const { Base64Decode } = require('base64-stream');
|
||||
const md5File = require('md5-file');
|
||||
|
@ -96,38 +93,6 @@ function removeUndefinedProperties(note: NoteEntity) {
|
|||
return output;
|
||||
}
|
||||
|
||||
function levenshteinPercent(s1: string, s2: string) {
|
||||
const l = new Levenshtein(s1, s2);
|
||||
if (!s1.length || !s2.length) return 1;
|
||||
return Math.abs(l.distance / s1.length);
|
||||
}
|
||||
|
||||
async function fuzzyMatch(note: ExtractedNote) {
|
||||
if (note.created_time < time.unixMs() - 1000 * 60 * 60 * 24 * 360) {
|
||||
const notes = await Note.modelSelectAll('SELECT * FROM notes WHERE is_conflict = 0 AND created_time = ? AND title = ?', [note.created_time, note.title]);
|
||||
return notes.length !== 1 ? null : notes[0];
|
||||
}
|
||||
|
||||
const notes = await Note.modelSelectAll('SELECT * FROM notes WHERE is_conflict = 0 AND created_time = ?', [note.created_time]);
|
||||
if (notes.length === 0) return null;
|
||||
if (notes.length === 1) return notes[0];
|
||||
|
||||
let lowestL = 1;
|
||||
let lowestN = null;
|
||||
for (let i = 0; i < notes.length; i++) {
|
||||
const n = notes[i];
|
||||
const l = levenshteinPercent(note.title, n.title);
|
||||
if (l < lowestL) {
|
||||
lowestL = l;
|
||||
lowestN = n;
|
||||
}
|
||||
}
|
||||
|
||||
if (lowestN && lowestL < 0.2) return lowestN;
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
interface ExtractedResource {
|
||||
hasData?: boolean;
|
||||
id?: string;
|
||||
|
@ -155,6 +120,14 @@ interface ExtractedNote extends NoteEntity {
|
|||
bodyXml?: string;
|
||||
}
|
||||
|
||||
// Those are the notes that have been parsed and saved to Joplin. We don't keep
|
||||
// in memory the whole `ExtractedNote` because it contains resource data, etc.
|
||||
// We only keep what is needed to restore the note links.
|
||||
interface SavedNote {
|
||||
id: string;
|
||||
body: string;
|
||||
}
|
||||
|
||||
// At this point we have the resource as it's been parsed from the XML, but
|
||||
// additional processing needs to be done to get the final resource file, its
|
||||
// size, MD5, etc.
|
||||
|
@ -245,26 +218,19 @@ async function saveNoteTags(note: ExtractedNote) {
|
|||
return notesTagged;
|
||||
}
|
||||
|
||||
interface ImportOptions {
|
||||
fuzzyMatching?: boolean;
|
||||
export interface ImportOptions {
|
||||
// eslint-disable-next-line @typescript-eslint/ban-types -- Old code before rule was applied
|
||||
onProgress?: Function;
|
||||
// eslint-disable-next-line @typescript-eslint/ban-types -- Old code before rule was applied
|
||||
onError?: Function;
|
||||
outputFormat?: string;
|
||||
batchSize?: number;
|
||||
}
|
||||
|
||||
async function saveNoteToStorage(note: ExtractedNote, importOptions: ImportOptions) {
|
||||
importOptions = { fuzzyMatching: false, ...importOptions };
|
||||
|
||||
async function saveNoteToStorage(note: ExtractedNote) {
|
||||
note = Note.filter(note as any);
|
||||
|
||||
const existingNote = importOptions.fuzzyMatching ? await fuzzyMatch(note) : null;
|
||||
|
||||
const result = {
|
||||
noteCreated: false,
|
||||
noteUpdated: false,
|
||||
noteSkipped: false,
|
||||
resourcesCreated: 0,
|
||||
notesTagged: 0,
|
||||
};
|
||||
|
@ -275,28 +241,10 @@ async function saveNoteToStorage(note: ExtractedNote, importOptions: ImportOptio
|
|||
const notesTagged = await saveNoteTags(note);
|
||||
result.notesTagged += notesTagged;
|
||||
|
||||
if (existingNote) {
|
||||
const diff = BaseModel.diffObjects(existingNote, note);
|
||||
delete diff.tags;
|
||||
delete diff.resources;
|
||||
delete diff.id;
|
||||
|
||||
if (!Object.getOwnPropertyNames(diff).length) {
|
||||
result.noteSkipped = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
diff.id = existingNote.id;
|
||||
diff.type_ = existingNote.type_;
|
||||
await Note.save(diff, { autoTimestamp: false });
|
||||
result.noteUpdated = true;
|
||||
} else {
|
||||
await Note.save(note, {
|
||||
isNew: true,
|
||||
autoTimestamp: false,
|
||||
});
|
||||
result.noteCreated = true;
|
||||
}
|
||||
await Note.save(note, {
|
||||
isNew: true,
|
||||
autoTimestamp: false,
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@ -345,12 +293,47 @@ const preProcessFile = async (filePath: string): Promise<string> => {
|
|||
// return newFilePath;
|
||||
};
|
||||
|
||||
export default async function importEnex(parentFolderId: string, filePath: string, importOptions: ImportOptions = null) {
|
||||
if (!importOptions) importOptions = {};
|
||||
if (!('fuzzyMatching' in importOptions)) importOptions.fuzzyMatching = false;
|
||||
if (!('onProgress' in importOptions)) importOptions.onProgress = function() {};
|
||||
if (!('onError' in importOptions)) importOptions.onError = function() {};
|
||||
|
||||
const restoreNoteLinks = async (notes: SavedNote[], noteTitlesToIds: Record<string, string[]>, importOptions: ImportOptions) => {
|
||||
// --------------------------------------------------------
|
||||
// Convert the Evernote note links to Joplin note links. If
|
||||
// we don't find a matching note, or if there are multiple
|
||||
// matching notes, we leave the Evernote links as is.
|
||||
// --------------------------------------------------------
|
||||
|
||||
for (const note of notes) {
|
||||
const links = importOptions.outputFormat === 'html' ?
|
||||
extractUrlsFromHtml(note.body) :
|
||||
extractUrlsFromMarkdown(note.body);
|
||||
|
||||
let noteChanged = false;
|
||||
|
||||
for (const link of links) {
|
||||
const matchingNoteIds = noteTitlesToIds[link.title];
|
||||
if (matchingNoteIds && matchingNoteIds.length === 1) {
|
||||
note.body = note.body.replace(link.url, `:/${matchingNoteIds[0]}`);
|
||||
noteChanged = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (noteChanged) {
|
||||
await Note.save({
|
||||
id: note.id,
|
||||
body: note.body,
|
||||
updated_time: Date.now(),
|
||||
}, {
|
||||
autoTimestamp: false,
|
||||
});
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
interface ParseNotesResult {
|
||||
savedNotes: SavedNote[];
|
||||
noteTitlesToIds: Record<string, string[]>;
|
||||
}
|
||||
|
||||
const parseNotes = async (parentFolderId: string, filePath: string, importOptions: ImportOptions = null): Promise<ParseNotesResult> => {
|
||||
// eslint-disable-next-line @typescript-eslint/ban-types -- Old code before rule was applied
|
||||
function handleSaxStreamEvent(fn: Function) {
|
||||
return function(...args: any[]) {
|
||||
|
@ -397,6 +380,9 @@ export default async function importEnex(parentFolderId: string, filePath: strin
|
|||
let noteResourceRecognition: NoteResourceRecognition = null;
|
||||
const notes: ExtractedNote[] = [];
|
||||
let processingNotes = false;
|
||||
const savedNotes: SavedNote[] = [];
|
||||
const createdNoteIds: string[] = [];
|
||||
const noteTitlesToIds: Record<string, string[]> = {};
|
||||
|
||||
const createErrorWithNoteTitle = (fnThis: any, error: any) => {
|
||||
const line = [];
|
||||
|
@ -437,15 +423,6 @@ export default async function importEnex(parentFolderId: string, filePath: strin
|
|||
processingNotes = true;
|
||||
stream.pause();
|
||||
|
||||
// Set the note ID so that we can create a title-to-id map, which
|
||||
// will be needed to recreate the note links below.
|
||||
const noteTitleToId: Record<string, string[]> = {};
|
||||
for (const note of notes) {
|
||||
if (!noteTitleToId[note.title]) noteTitleToId[note.title] = [];
|
||||
note.id = uuid.create();
|
||||
noteTitleToId[note.title].push(note.id);
|
||||
}
|
||||
|
||||
while (notes.length) {
|
||||
const note = notes.shift();
|
||||
|
||||
|
@ -467,32 +444,16 @@ export default async function importEnex(parentFolderId: string, filePath: strin
|
|||
// Convert the ENEX body to either Markdown or HTML
|
||||
// --------------------------------------------------------
|
||||
|
||||
let body: string = importOptions.outputFormat === 'html' ?
|
||||
const body: string = importOptions.outputFormat === 'html' ?
|
||||
await enexXmlToHtml(note.bodyXml, note.resources) :
|
||||
await enexXmlToMd(note.bodyXml, note.resources, note.tasks);
|
||||
delete note.bodyXml;
|
||||
|
||||
// --------------------------------------------------------
|
||||
// Convert the Evernote note links to Joplin note links. If
|
||||
// we don't find a matching note, or if there are multiple
|
||||
// matching notes, we leave the Evernote links as is.
|
||||
// --------------------------------------------------------
|
||||
|
||||
const links = importOptions.outputFormat === 'html' ?
|
||||
extractUrlsFromHtml(body) :
|
||||
extractUrlsFromMarkdown(body);
|
||||
|
||||
for (const link of links) {
|
||||
const matchingNoteIds = noteTitleToId[link.title];
|
||||
if (matchingNoteIds && matchingNoteIds.length === 1) {
|
||||
body = body.replace(link.url, `:/${matchingNoteIds[0]}`);
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------
|
||||
// Finish setting up the note
|
||||
// --------------------------------------------------------
|
||||
|
||||
note.id = uuid.create();
|
||||
note.markup_language = importOptions.outputFormat === 'html' ?
|
||||
MarkupToHtml.MARKUP_LANGUAGE_HTML :
|
||||
MarkupToHtml.MARKUP_LANGUAGE_MARKDOWN;
|
||||
|
@ -511,15 +472,17 @@ export default async function importEnex(parentFolderId: string, filePath: strin
|
|||
// that case
|
||||
if (!note.updated_time) note.updated_time = note.created_time;
|
||||
|
||||
const result = await saveNoteToStorage(note, importOptions);
|
||||
const result = await saveNoteToStorage(note);
|
||||
|
||||
if (result.noteUpdated) {
|
||||
progressState.updated++;
|
||||
} else if (result.noteCreated) {
|
||||
progressState.created++;
|
||||
} else if (result.noteSkipped) {
|
||||
progressState.skipped++;
|
||||
}
|
||||
createdNoteIds.push(note.id);
|
||||
if (!noteTitlesToIds[note.title]) noteTitlesToIds[note.title] = [];
|
||||
noteTitlesToIds[note.title].push(note.id);
|
||||
savedNotes.push({
|
||||
id: note.id,
|
||||
body: note.body,
|
||||
});
|
||||
|
||||
progressState.created++;
|
||||
progressState.resourcesCreated += result.resourcesCreated;
|
||||
progressState.notesTagged += result.notesTagged;
|
||||
importOptions.onProgress(progressState);
|
||||
|
@ -648,7 +611,7 @@ export default async function importEnex(parentFolderId: string, filePath: strin
|
|||
|
||||
notes.push(note);
|
||||
|
||||
if (notes.length >= 10) {
|
||||
if (notes.length >= importOptions.batchSize) {
|
||||
// eslint-disable-next-line promise/prefer-await-to-then -- Old code before rule was applied
|
||||
processNotes().catch(error => {
|
||||
importOptions.onError(createErrorWithNoteTitle(this, error));
|
||||
|
@ -718,12 +681,25 @@ export default async function importEnex(parentFolderId: string, filePath: strin
|
|||
if (allDone) {
|
||||
shim.clearTimeout(iid);
|
||||
if (needToDeleteFileToProcess) void shim.fsDriver().remove(fileToProcess);
|
||||
resolve(null);
|
||||
resolve({
|
||||
savedNotes,
|
||||
noteTitlesToIds,
|
||||
});
|
||||
}
|
||||
});
|
||||
}, 500);
|
||||
}, 1000);
|
||||
}));
|
||||
|
||||
stream.pipe(saxStream);
|
||||
});
|
||||
};
|
||||
|
||||
export default async function importEnex(parentFolderId: string, filePath: string, importOptions: ImportOptions = null) {
|
||||
if (!importOptions) importOptions = {};
|
||||
if (!('onProgress' in importOptions)) importOptions.onProgress = function() {};
|
||||
if (!('onError' in importOptions)) importOptions.onError = function() {};
|
||||
if (!('batchSize' in importOptions)) importOptions.batchSize = 10;
|
||||
|
||||
const result = await parseNotes(parentFolderId, filePath, importOptions);
|
||||
await restoreNoteLinks(result.savedNotes, result.noteTitlesToIds, importOptions);
|
||||
}
|
||||
|
|
|
@ -66,7 +66,6 @@
|
|||
"image-type": "3.1.0",
|
||||
"immer": "7.0.15",
|
||||
"js-yaml": "4.1.0",
|
||||
"levenshtein": "1.0.5",
|
||||
"markdown-it": "13.0.2",
|
||||
"md5": "2.3.0",
|
||||
"md5-file": "5.0.0",
|
||||
|
|
|
@ -6871,7 +6871,6 @@ __metadata:
|
|||
immer: 7.0.15
|
||||
jest: 29.7.0
|
||||
js-yaml: 4.1.0
|
||||
levenshtein: 1.0.5
|
||||
markdown-it: 13.0.2
|
||||
md5: 2.3.0
|
||||
md5-file: 5.0.0
|
||||
|
@ -27688,13 +27687,6 @@ __metadata:
|
|||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"levenshtein@npm:1.0.5":
|
||||
version: 1.0.5
|
||||
resolution: "levenshtein@npm:1.0.5"
|
||||
checksum: d5ceca3bfc4804ad50515291841d968eea5f1f740310c21b5ae6cb6d5514ee68b9c00405059f36934611d3258967bad6d306dcf299f446c7cdd25bdda2c4720c
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"levn@npm:^0.4.1":
|
||||
version: 0.4.1
|
||||
resolution: "levn@npm:0.4.1"
|
||||
|
|
Loading…
Reference in New Issue