Started rewriting search engine to better support unicode

pull/1141/head
Laurent Cozic 2018-12-29 20:19:18 +01:00
parent f308fe71f9
commit 41155f5ef4
7 changed files with 123 additions and 8 deletions

View File

@ -31,6 +31,7 @@ npm test tests-build/models_Note.js
npm test tests-build/models_Tag.js
npm test tests-build/models_Setting.js
npm test tests-build/pathUtils.js
npm test tests-build/StringUtils.js
npm test tests-build/services_InteropService.js
npm test tests-build/services_ResourceService.js
npm test tests-build/urlUtils.js

View File

@ -16,6 +16,7 @@ describe('StringUtils', function() {
it('should surround keywords with strings', async (done) => {
const testCases = [
[[], 'test', 'a', 'b', 'test'],
[['test'], 'test', 'a', 'b', 'atestb'],
[['test'], 'Test', 'a', 'b', 'aTestb'],
[['te[]st'], 'Te[]st', 'a', 'b', 'aTe[]stb'],

View File

@ -28,27 +28,32 @@ describe('services_SearchEngine', function() {
n1 = await Note.save({ title: "a" });
n2 = await Note.save({ title: "b" });
await engine.syncTables();
rows = await engine.search('a');
expect(rows.length).toBe(1);
expect(rows[0].title).toBe('a');
await Note.delete(n1.id);
await engine.syncTables();
rows = await engine.search('a');
expect(rows.length).toBe(0);
rows = await engine.search('b');
expect(rows[0].title).toBe('b');
await Note.save({ id: n2.id, title: 'c' });
await engine.syncTables();
rows = await engine.search('b');
expect(rows.length).toBe(0);
rows = await engine.search('c');
expect(rows[0].title).toBe('c');
await Note.save({ id: n2.id, encryption_applied: 1 });
await engine.syncTables();
rows = await engine.search('c');
expect(rows.length).toBe(0);
await Note.save({ id: n2.id, encryption_applied: 0 });
await engine.syncTables();
rows = await engine.search('c');
expect(rows.length).toBe(1);
@ -60,6 +65,7 @@ describe('services_SearchEngine', function() {
const n2 = await Note.save({ title: "abcd aaaaa abcd abcd" }); // 1
const n3 = await Note.save({ title: "abcd aaaaa bbbb eeee abcd" }); // 2
await engine.syncTables();
const rows = await engine.search('abcd');
expect(rows[0].id).toBe(n2.id);
@ -81,6 +87,7 @@ describe('services_SearchEngine', function() {
// 5
const n5 = await Note.save({ title: "occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh occurence many times but very abcd spread appart spread appart spread appart spread appart spread appart efgh" });
await engine.syncTables();
const rows = await engine.search('abcd efgh');
expect(rows[0].id).toBe(n1.id);
@ -97,6 +104,11 @@ describe('services_SearchEngine', function() {
const n1 = await Note.save({ title: "abcd efgh ijkl", body: "aaaa bbbb" });
const n2 = await Note.save({ title: "iiii efgh bbbb", body: "aaaa bbbb" });
const n3 = await Note.save({ title: "Агентство Рейтер" });
const n4 = await Note.save({ title: "Dog" });
const n5 = await Note.save({ title: "СООБЩИЛО" });
await engine.syncTables();
rows = await engine.search('abcd ijkl');
expect(rows.length).toBe(1);
@ -122,6 +134,21 @@ describe('services_SearchEngine', function() {
rows = await engine.search('body:bbbb iiii');
expect(rows.length).toBe(1);
rows = await engine.search('Рейтер');
expect(rows.length).toBe(1);
rows = await engine.search('pейтер');
expect(rows.length).toBe(1);
rows = await engine.search('Dog');
expect(rows.length).toBe(1);
rows = await engine.search('dog');
expect(rows.length).toBe(1);
rows = await engine.search('сообщило');
expect(rows.length).toBe(1);
done();
});
@ -172,12 +199,6 @@ describe('services_SearchEngine', function() {
const r = shouldMatch[j].match(regex);
expect(!!r).toBe(true, '"' + input + '" should match "' + shouldMatch[j] + '"');
}
// for (let j = 0; j < shouldNotMatch.length; j++) {
// const r = shouldNotMatch[j].match(regex);
// // console.info(input, shouldNotMatch)
// expect(!!r).toBe(false, '"' + input + '" should not match "' + shouldNotMatch[j] + '"');
// }
}
expect(engine.parseQuery('*').termCount).toBe(0);

View File

@ -137,6 +137,7 @@ async function clearDatabase(id = null) {
'DELETE FROM settings',
'DELETE FROM deleted_items',
'DELETE FROM sync_items',
'DELETE FROM notes_normalized',
];
await databases_[id].transactionExecBatch(queries);

View File

@ -474,13 +474,54 @@ class JoplinDatabase extends Database {
END;`);
}
if (targetVersion == 16) {
const notesNormalized = `
CREATE TABLE notes_normalized (
id TEXT NOT NULL,
title TEXT NOT NULL DEFAULT "",
body TEXT NOT NULL DEFAULT ""
);
`;
queries.push(this.sqlStringToLines(notesNormalized)[0]);
queries.push('CREATE INDEX notes_normalized_id ON notes_normalized (id)');
queries.push('DROP TRIGGER IF EXISTS notes_fts_before_update');
queries.push('DROP TRIGGER IF EXISTS notes_fts_before_delete');
queries.push('DROP TRIGGER IF EXISTS notes_after_update');
queries.push('DROP TRIGGER IF EXISTS notes_after_insert');
queries.push('DROP TABLE IF EXISTS notes_fts');
queries.push('CREATE VIRTUAL TABLE notes_fts USING fts4(content="notes_normalized", notindexed="id", id, title, body)');
// Keep the content tables (notes) and the FTS table (notes_fts) in sync.
// More info at https://www.sqlite.org/fts3.html#_external_content_fts4_tables_
queries.push(`
CREATE TRIGGER notes_fts_before_update BEFORE UPDATE ON notes_normalized BEGIN
DELETE FROM notes_fts WHERE docid=old.rowid;
END;`);
queries.push(`
CREATE TRIGGER notes_fts_before_delete BEFORE DELETE ON notes_normalized BEGIN
DELETE FROM notes_fts WHERE docid=old.rowid;
END;`);
queries.push(`
CREATE TRIGGER notes_after_update AFTER UPDATE ON notes_normalized BEGIN
INSERT INTO notes_fts(docid, id, title, body) SELECT rowid, id, title, body FROM notes_normalized WHERE new.rowid = notes_normalized.rowid;
END;`);
queries.push(`
CREATE TRIGGER notes_after_insert AFTER INSERT ON notes_normalized BEGIN
INSERT INTO notes_fts(docid, id, title, body) SELECT rowid, id, title, body FROM notes_normalized WHERE new.rowid = notes_normalized.rowid;
END;`);
}
queries.push({ sql: 'UPDATE version SET version = ?', params: [targetVersion] });
try {
await this.transactionExecBatch(queries);
} catch (error) {
if (targetVersion === 15) {
this.logger().warn('Could not upgrade to database v15 - FTS feature will not be used', error);
if (targetVersion === 15 || targetVersion === 16) {
this.logger().warn('Could not upgrade to database v15 or v16 - FTS feature will not be used', error);
} else {
throw error;
}

View File

@ -36,6 +36,54 @@ class SearchEngine {
return this.db_;
}
async syncTables() {
this.logger().info('SearchEngine: Updating FTS table...');
await ItemChange.waitForAllSaved();
const startTime = Date.now();
let lastChangeId = Setting.value('searchEngine.lastProcessedChangeId');
// TODO: if lastChangedid is undefined - index the whole notes table
while (true) {
const changes = await ItemChange.modelSelectAll(`
SELECT id, item_id, type
FROM item_changes
WHERE item_type = ?
AND id > ?
ORDER BY id ASC
LIMIT 100
`, [BaseModel.TYPE_NOTE, lastChangeId]);
if (!changes.length) break;
const queries = [];
for (let i = 0; i < changes.length; i++) {
const change = changes[i];
if (change.type === ItemChange.TYPE_CREATE || change.type === ItemChange.TYPE_UPDATE) {
queries.push({ sql: 'DELETE FROM notes_normalized WHERE id = ?', params: [change.item_id] });
queries.push({ sql: 'INSERT INTO notes_normalized(id, title, body) SELECT id, title, body FROM notes WHERE id = ? AND is_conflict = 0 AND encryption_applied = 0', params: [change.item_id] });
} else if (change.type === ItemChange.TYPE_DELETE) {
queries.push({ sql: 'DELETE FROM notes_normalized WHERE id = ?', params: [change.item_id] });
} else {
throw new Error('Invalid change type: ' + change.type);
}
lastChangeId = change.id;
}
await this.db().transactionExecBatch(queries);
Setting.setValue('searchEngine.lastProcessedChangeId', lastChangeId);
await Setting.saveAll();
}
this.logger().info('SearchEngine: Updated FTS table in ' + (Date.now() - startTime) + 'ms');
}
async countRows() {
const sql = 'SELECT count(*) as total FROM notes_fts'
const row = await this.db().selectOne(sql);

View File

@ -229,6 +229,8 @@ function pregQuote(str, delimiter = '') {
}
function surroundKeywords(keywords, text, prefix, suffix) {
if (!keywords.length) return text;
let regexString = keywords.map((k) => {
if (k.type === 'regex') {
return k.value;