diff --git a/packages/app-desktop/plugins/GotoAnything.tsx b/packages/app-desktop/plugins/GotoAnything.tsx index 9ee9e2352e..93dac2dce1 100644 --- a/packages/app-desktop/plugins/GotoAnything.tsx +++ b/packages/app-desktop/plugins/GotoAnything.tsx @@ -67,7 +67,6 @@ class GotoAnything { class Dialog extends React.PureComponent { - private fuzzy_: boolean; private styles_: any; private inputRef: any; private itemListRef: any; @@ -77,8 +76,6 @@ class Dialog extends React.PureComponent { constructor(props: Props) { super(props); - this.fuzzy_ = false; - const startString = props?.userData?.startString ? props?.userData?.startString : ''; this.state = { @@ -242,7 +239,7 @@ class Dialog extends React.PureComponent { } async keywords(searchQuery: string) { - const parsedQuery = await SearchEngine.instance().parseQuery(searchQuery, this.fuzzy_); + const parsedQuery = await SearchEngine.instance().parseQuery(searchQuery); return SearchEngine.instance().allParsedQueryTerms(parsedQuery); } @@ -296,7 +293,7 @@ class Dialog extends React.PureComponent { } else { // Note TITLE or BODY listType = BaseModel.TYPE_NOTE; searchQuery = this.makeSearchQuery(this.state.query); - results = await SearchEngine.instance().search(searchQuery, { fuzzy: this.fuzzy_ }); + results = await SearchEngine.instance().search(searchQuery); resultsInBody = !!results.find((row: any) => row.fields.includes('body')); diff --git a/packages/lib/BaseApplication.ts b/packages/lib/BaseApplication.ts index 403aa50b19..4ee4b9d55e 100644 --- a/packages/lib/BaseApplication.ts +++ b/packages/lib/BaseApplication.ts @@ -732,22 +732,6 @@ export default class BaseApplication { this.database_.setLogExcludedQueryTypes(['SELECT']); this.database_.setLogger(globalLogger); - // if (Setting.value('env') === 'dev') { - // if (shim.isElectron()) { - // this.database_.extensionToLoad = './lib/sql-extensions/spellfix'; - // } - // } else { - // if (shim.isElectron()) { - // if (shim.isWindows()) { - // const appDir = process.execPath.substring(0, process.execPath.lastIndexOf('\\')); - // this.database_.extensionToLoad = `${appDir}/usr/lib/spellfix`; - // } else { - // const appDir = process.execPath.substring(0, process.execPath.lastIndexOf('/')); - // this.database_.extensionToLoad = `${appDir}/usr/lib/spellfix`; - // } - // } - // } - await this.database_.open({ name: `${profileDir}/database.sqlite` }); // if (Setting.value('env') === 'dev') await this.database_.clearForTesting(); @@ -774,19 +758,6 @@ export default class BaseApplication { setLocale(Setting.value('locale')); } - // if (Setting.value('db.fuzzySearchEnabled') === -1) { - // const fuzzySearchEnabled = await this.database_.fuzzySearchEnabled(); - // Setting.setValue('db.fuzzySearchEnabled', fuzzySearchEnabled ? 1 : 0); - // } - - // // Always disable on CLI because building and packaging the extension is not working - // // and is too error-prone - requires gcc on the machine, or we should package the .so - // // and dylib files, but it's not sure it would work everywhere if not built from - // // source on the target machine. - // if (Setting.value('appType') !== 'desktop') { - // Setting.setValue('db.fuzzySearchEnabled', 0); - // } - // For now always disable fuzzy search due to performance issues: // https://discourse.joplinapp.org/t/1-1-4-keyboard-locks-up-while-typing/11231/11 // https://discourse.joplinapp.org/t/serious-lagging-when-there-are-tens-of-thousands-of-notes/11215/23 diff --git a/packages/lib/JoplinDatabase.ts b/packages/lib/JoplinDatabase.ts index 5dd5f325c6..b4ea32b8bf 100644 --- a/packages/lib/JoplinDatabase.ts +++ b/packages/lib/JoplinDatabase.ts @@ -140,8 +140,6 @@ export default class JoplinDatabase extends Database { constructor(driver: any) { super(driver); - - // this.extensionToLoad = './build/lib/sql-extensions/spellfix'; } initialized() { @@ -933,15 +931,6 @@ export default class JoplinDatabase extends Database { async initialize() { this.logger().info('Checking for database schema update...'); - // try { - // // Note that the only extension that can be loaded as of now is spellfix. - // // If it fails here, it will fail on the fuzzySearchEnabled() check above - // // too, thus disabling spellfix for the app. - // await this.loadExtension(this.extensionToLoad); - // } catch (error) { - // this.logger().error(error); - // } - let versionRow = null; try { // Will throw if the database has not been created yet, but this is handled below diff --git a/packages/lib/services/searchengine/SearchEngine.ts b/packages/lib/services/searchengine/SearchEngine.ts index bc28c9c778..7dde1a85f1 100644 --- a/packages/lib/services/searchengine/SearchEngine.ts +++ b/packages/lib/services/searchengine/SearchEngine.ts @@ -18,7 +18,6 @@ export default class SearchEngine { public static SEARCH_TYPE_AUTO = 'auto'; public static SEARCH_TYPE_BASIC = 'basic'; public static SEARCH_TYPE_FTS = 'fts'; - public static SEARCH_TYPE_FTS_FUZZY = 'fts_fuzzy'; public dispatch: Function = (_o: any) => {}; private logger_ = new Logger(); @@ -88,11 +87,6 @@ export default class SearchEngine { ); } - if (!noteIds.length && (Setting.value('db.fuzzySearchEnabled') === 1)) { - // On the last loop - queries.push({ sql: 'INSERT INTO notes_spellfix(word,rank) SELECT term, documents FROM search_aux WHERE col=\'*\'' }); - } - await this.db().transactionExecBatch(queries); } @@ -157,16 +151,9 @@ export default class SearchEngine { [BaseModel.TYPE_NOTE, lastChangeId] ); - const queries = []; + if (!changes.length) break; - if (!changes.length) { - if (Setting.value('db.fuzzySearchEnabled') === 1) { - queries.push({ sql: 'DELETE FROM notes_spellfix' }); - queries.push({ sql: 'INSERT INTO notes_spellfix(word,rank) SELECT term, documents FROM search_aux WHERE col=\'*\'' }); - await this.db().transactionExecBatch(queries); - } - break; - } + const queries = []; const noteIds = changes.map(a => a.item_id); const notes = await Note.modelSelectAll(` @@ -273,7 +260,7 @@ export default class SearchEngine { - calculateWeightBM25_(rows: any[], fuzzyScore: any) { + calculateWeightBM25_(rows: any[]) { // https://www.sqlite.org/fts3.html#matchinfo // pcnalx are the arguments passed to matchinfo // p - The number of matchable phrases in the query. @@ -352,20 +339,14 @@ export default class SearchEngine { for (let i = 0; i < rows.length; i++) { const row = rows[i]; row.weight = 0; - row.fuzziness = 1000; - row.wordFound = []; for (let j = 0; j < numPhrases; j++) { - let found = false; columns.forEach(column => { const rowsWithHits = docsWithHits(X[i], column, j); const frequencyHits = hitsThisRow(X[i], column, j); const idf = IDF(rowsWithHits, numRows); - found = found ? found : (frequencyHits > 0); row.weight += BM25(idf, frequencyHits, numTokens[column][i], avgTokens[column]); - row.fuzziness = (frequencyHits > 0) ? Math.min(row.fuzziness, fuzzyScore[j]) : row.fuzziness; }); - row.wordFound.push(found); } row.weight += weightForDaysSinceLastUpdate(row); @@ -392,35 +373,18 @@ export default class SearchEngine { } processResults_(rows: any[], parsedQuery: any, isBasicSearchResults = false) { - const rowContainsAllWords = (wordsFound: any, numFuzzyMatches: any) => { - let start = 0; - let end = 0; - for (let i = 0; i < numFuzzyMatches.length; i++) { - end = end + numFuzzyMatches[i]; - if (!(wordsFound.slice(start, end).find((x: any) => x))) { - // This note doesn't contain any fuzzy matches for the word - return false; - } - start = end; - } - return true; - }; - if (isBasicSearchResults) { this.processBasicSearchResults_(rows, parsedQuery); } else { - this.calculateWeightBM25_(rows, parsedQuery.fuzzyScore); + this.calculateWeightBM25_(rows); for (let i = 0; i < rows.length; i++) { const row = rows[i]; - row.include = (parsedQuery.fuzzy && !parsedQuery.any) ? rowContainsAllWords(row.wordFound, parsedQuery.numFuzzyMatches) : true; const offsets = row.offsets.split(' ').map((o: any) => Number(o)); row.fields = this.fieldNamesFromOffsets_(offsets); } } rows.sort((a, b) => { - if (a.fuzziness < b.fuzziness) return -1; - if (a.fuzziness > b.fuzziness) return +1; if (a.fields.includes('title') && !b.fields.includes('title')) return -1; if (!a.fields.includes('title') && b.fields.includes('title')) return +1; if (a.weight < b.weight) return +1; @@ -448,22 +412,11 @@ export default class SearchEngine { return regexString; } - async fuzzifier(words: string[]) { - const fuzzyMatches: any[] = []; - words.forEach(word => { - const fuzzyWords = this.db().selectAll('SELECT word, score FROM notes_spellfix WHERE word MATCH ? AND top=3', [word]); - fuzzyMatches.push(fuzzyWords); - }); - return await Promise.all(fuzzyMatches); - } - - async parseQuery(query: string, fuzzy: any = null) { - if (fuzzy === null) fuzzy = Setting.value('db.fuzzySearchEnabled') === 1; + async parseQuery(query: string) { const trimQuotes = (str: string) => str.startsWith('"') ? str.substr(1, str.length - 2) : str; let allTerms: any[] = []; - let allFuzzyTerms = []; try { allTerms = filterParser(query); @@ -471,81 +424,11 @@ export default class SearchEngine { console.warn(error); } - const textTerms = allTerms.filter(x => x.name === 'text' && !x.negated); - const titleTerms = allTerms.filter(x => x.name === 'title' && !x.negated); - const bodyTerms = allTerms.filter(x => x.name === 'body' && !x.negated); + const textTerms = allTerms.filter(x => x.name === 'text' && !x.negated).map(x => trimQuotes(x.value)); + const titleTerms = allTerms.filter(x => x.name === 'title' && !x.negated).map(x => trimQuotes(x.value)); + const bodyTerms = allTerms.filter(x => x.name === 'body' && !x.negated).map(x => trimQuotes(x.value)); - const fuzzyScore = []; - let numFuzzyMatches = []; - let terms: any = null; - - if (fuzzy) { - const fuzzyText = await this.fuzzifier(textTerms.filter(x => !(x.quoted || x.wildcard)).map(x => trimQuotes(x.value))); - const fuzzyTitle = await this.fuzzifier(titleTerms.filter(x => !x.wildcard).map(x => trimQuotes(x.value))); - const fuzzyBody = await this.fuzzifier(bodyTerms.filter(x => !x.wildcard).map(x => trimQuotes(x.value))); - - // Floor the fuzzy scores to 0, 1 and 2. - const floorFuzzyScore = (matches: any) => { - for (let i = 0; i < matches.length; i++) matches[i].score = i; - }; - - fuzzyText.forEach(floorFuzzyScore); - fuzzyTitle.forEach(floorFuzzyScore); - fuzzyBody.forEach(floorFuzzyScore); - - const phraseTextSearch = textTerms.filter(x => x.quoted); - const wildCardSearch = textTerms.concat(titleTerms).concat(bodyTerms).filter(x => x.wildcard); - - // Save number of fuzzy matches we got for each word - // fuzzifier() is currently set to return at most 3 matches - // We need to know which fuzzy words go together so that we can filter out notes that don't contain a required word. - numFuzzyMatches = fuzzyText.concat(fuzzyTitle).concat(fuzzyBody).map(x => x.length); - for (let i = 0; i < phraseTextSearch.length + wildCardSearch.length; i++) { - // Phrase searches and wildcard searches are preserved without fuzzification (A single match) - numFuzzyMatches.push(1); - } - - const mergedFuzzyText = [].concat.apply([], fuzzyText); - const mergedFuzzyTitle = [].concat.apply([], fuzzyTitle); - const mergedFuzzyBody = [].concat.apply([], fuzzyBody); - - const fuzzyTextTerms = mergedFuzzyText.map(x => { return { name: 'text', value: x.word, negated: false, score: x.score }; }); - const fuzzyTitleTerms = mergedFuzzyTitle.map(x => { return { name: 'title', value: x.word, negated: false, score: x.score }; }); - const fuzzyBodyTerms = mergedFuzzyBody.map(x => { return { name: 'body', value: x.word, negated: false, score: x.score }; }); - - // Remove previous text, title and body and replace with fuzzy versions - allTerms = allTerms.filter(x => (x.name !== 'text' && x.name !== 'title' && x.name !== 'body')); - - // The order matters here! - // The text goes first, then title, then body, then phrase and finally wildcard - // This is because it needs to match with numFuzzyMathches. - allFuzzyTerms = allTerms.concat(fuzzyTextTerms).concat(fuzzyTitleTerms).concat(fuzzyBodyTerms).concat(phraseTextSearch).concat(wildCardSearch); - - const allTextTerms = allFuzzyTerms.filter(x => x.name === 'title' || x.name === 'body' || x.name === 'text'); - for (let i = 0; i < allTextTerms.length; i++) { - // Phrase searches and wildcard searches will get a fuzziness score of zero. - // This means that they will go first in the sort order (Even if there are other words with matches in the title) - // Undesirable? - fuzzyScore.push(allFuzzyTerms[i].score ? allFuzzyTerms[i].score : 0); - } - - const wildCardTextTerms = wildCardSearch.filter(x => x.name === 'text').map(x =>trimQuotes(x.value)); - const wildCardTitleTerms = wildCardSearch.filter(x => x.name === 'title').map(x =>trimQuotes(x.value)); - const wildCardBodyTerms = wildCardSearch.filter(x => x.name === 'body').map(x =>trimQuotes(x.value)); - const phraseTextTerms = phraseTextSearch.map(x => trimQuotes(x.value)); - - terms = { - _: fuzzyTextTerms.map(x => trimQuotes(x.value)).concat(phraseTextTerms).concat(wildCardTextTerms), - title: fuzzyTitleTerms.map(x => trimQuotes(x.value)).concat(wildCardTitleTerms), - body: fuzzyBodyTerms.map(x => trimQuotes(x.value)).concat(wildCardBodyTerms), - }; - } else { - const nonNegatedTextTerms = textTerms.length + titleTerms.length + bodyTerms.length; - for (let i = 0; i < nonNegatedTextTerms; i++) { - fuzzyScore.push(0); - } - terms = { _: textTerms.map(x =>trimQuotes(x.value)), 'title': titleTerms.map(x =>trimQuotes(x.value)), 'body': bodyTerms.map(x =>trimQuotes(x.value)) }; - } + const terms: any = { _: textTerms, 'title': titleTerms, 'body': bodyTerms }; // Filter terms: // - Convert wildcards to regex @@ -603,10 +486,7 @@ export default class SearchEngine { termCount: termCount, keys: keys, terms: terms, // text terms - allTerms: fuzzy ? allFuzzyTerms : allTerms, - fuzzyScore: fuzzyScore, - numFuzzyMatches: numFuzzyMatches, - fuzzy: fuzzy, + allTerms: allTerms, any: !!allTerms.find(term => term.name === 'any'), }; } @@ -651,8 +531,8 @@ export default class SearchEngine { return Note.previews(null, searchOptions); } - determineSearchType_(query: string, options: any) { - if (options.searchType === SearchEngine.SEARCH_TYPE_BASIC) return SearchEngine.SEARCH_TYPE_BASIC; + determineSearchType_(query: string, preferredSearchType: any) { + if (preferredSearchType === SearchEngine.SEARCH_TYPE_BASIC) return SearchEngine.SEARCH_TYPE_BASIC; // If preferredSearchType is "fts" we auto-detect anyway // because it's not always supported. @@ -669,12 +549,9 @@ export default class SearchEngine { if (!Setting.value('db.ftsEnabled') || ['ja', 'zh', 'ko', 'th'].indexOf(st) >= 0) { return SearchEngine.SEARCH_TYPE_BASIC; - } else if (options.fuzzy) { - return SearchEngine.SEARCH_TYPE_FTS_FUZZY; - } else { - return SearchEngine.SEARCH_TYPE_FTS; } + return SearchEngine.SEARCH_TYPE_FTS; } async search(searchString: string, options: any = null) { @@ -682,35 +559,30 @@ export default class SearchEngine { options = Object.assign({}, { searchType: SearchEngine.SEARCH_TYPE_AUTO, - fuzzy: Setting.value('db.fuzzySearchEnabled') === 1, }, options); - const searchType = this.determineSearchType_(searchString, options); + const searchType = this.determineSearchType_(searchString, options.searchType); + const parsedQuery = await this.parseQuery(searchString); if (searchType === SearchEngine.SEARCH_TYPE_BASIC) { // Non-alphabetical languages aren't support by SQLite FTS (except with extensions which are not available in all platforms) searchString = this.normalizeText_(searchString); const rows = await this.basicSearch(searchString); - const parsedQuery = await this.parseQuery(searchString); + this.processResults_(rows, parsedQuery, true); return rows; } else { - // SEARCH_TYPE_FTS or SEARCH_TYPE_FTS_FUZZY + // SEARCH_TYPE_FTS // FTS will ignore all special characters, like "-" in the index. So if // we search for "this-phrase" it won't find it because it will only // see "this phrase" in the index. Because of this, we remove the dashes // when searching. // https://github.com/laurent22/joplin/issues/1075#issuecomment-459258856 - const parsedQuery = await this.parseQuery(searchString, searchType === SearchEngine.SEARCH_TYPE_FTS_FUZZY); - try { - const { query, params } = queryBuilder(parsedQuery.allTerms, searchType === SearchEngine.SEARCH_TYPE_FTS_FUZZY); + const { query, params } = queryBuilder(parsedQuery.allTerms); const rows = await this.db().selectAll(query, params); this.processResults_(rows, parsedQuery); - if (searchType === SearchEngine.SEARCH_TYPE_FTS_FUZZY && !parsedQuery.any) { - return rows.filter((row: any) => row.include); - } return rows; } catch (error) { this.logger().warn(`Cannot execute MATCH query: ${searchString}: ${error.message}`); diff --git a/packages/lib/services/searchengine/queryBuilder.ts b/packages/lib/services/searchengine/queryBuilder.ts index faa75cc21a..99dac2edaf 100644 --- a/packages/lib/services/searchengine/queryBuilder.ts +++ b/packages/lib/services/searchengine/queryBuilder.ts @@ -300,7 +300,7 @@ const sourceUrlFilter = (terms: Term[], conditons: string[], params: string[], r }; -const textFilter = (terms: Term[], conditions: string[], params: string[], relation: Relation, fuzzy: Boolean) => { +const textFilter = (terms: Term[], conditions: string[], params: string[], relation: Relation) => { const addExcludeTextConditions = (excludedTerms: Term[], conditions: string[], params: string[], relation: Relation) => { const type = excludedTerms[0].name === 'text' ? '' : `.${excludedTerms[0].name}`; @@ -342,7 +342,7 @@ const textFilter = (terms: Term[], conditions: string[], params: string[], relat if (term.name === 'text') return term.value; else return `${term.name}:${term.value}`; }); - const matchQuery = (fuzzy || (relation === 'OR')) ? termsToMatch.join(' OR ') : termsToMatch.join(' '); + const matchQuery = (relation === 'OR') ? termsToMatch.join(' OR ') : termsToMatch.join(' '); params.push(matchQuery); } @@ -374,7 +374,7 @@ const getConnective = (terms: Term[], relation: Relation): string => { return (!notebookTerm && (relation === 'OR')) ? 'ROWID=-1' : '1'; // ROWID=-1 acts as 0 (something always false) }; -export default function queryBuilder(terms: Term[], fuzzy: boolean) { +export default function queryBuilder(terms: Term[]) { const queryParts: string[] = []; const params: string[] = []; const withs: string[] = []; @@ -402,7 +402,7 @@ export default function queryBuilder(terms: Term[], fuzzy: boolean) { resourceFilter(terms, queryParts, params, relation, withs); - textFilter(terms, queryParts, params, relation, fuzzy); + textFilter(terms, queryParts, params, relation); typeFilter(terms, queryParts, params, relation);