mirror of https://github.com/laurent22/joplin.git
Chore: Clean up unused fuzzy search code (#4593)
parent
ec99511397
commit
3657c0369e
|
@ -67,7 +67,6 @@ class GotoAnything {
|
|||
|
||||
class Dialog extends React.PureComponent<Props, State> {
|
||||
|
||||
private fuzzy_: boolean;
|
||||
private styles_: any;
|
||||
private inputRef: any;
|
||||
private itemListRef: any;
|
||||
|
@ -77,8 +76,6 @@ class Dialog extends React.PureComponent<Props, State> {
|
|||
constructor(props: Props) {
|
||||
super(props);
|
||||
|
||||
this.fuzzy_ = false;
|
||||
|
||||
const startString = props?.userData?.startString ? props?.userData?.startString : '';
|
||||
|
||||
this.state = {
|
||||
|
@ -242,7 +239,7 @@ class Dialog extends React.PureComponent<Props, State> {
|
|||
}
|
||||
|
||||
async keywords(searchQuery: string) {
|
||||
const parsedQuery = await SearchEngine.instance().parseQuery(searchQuery, this.fuzzy_);
|
||||
const parsedQuery = await SearchEngine.instance().parseQuery(searchQuery);
|
||||
return SearchEngine.instance().allParsedQueryTerms(parsedQuery);
|
||||
}
|
||||
|
||||
|
@ -296,7 +293,7 @@ class Dialog extends React.PureComponent<Props, State> {
|
|||
} else { // Note TITLE or BODY
|
||||
listType = BaseModel.TYPE_NOTE;
|
||||
searchQuery = this.makeSearchQuery(this.state.query);
|
||||
results = await SearchEngine.instance().search(searchQuery, { fuzzy: this.fuzzy_ });
|
||||
results = await SearchEngine.instance().search(searchQuery);
|
||||
|
||||
resultsInBody = !!results.find((row: any) => row.fields.includes('body'));
|
||||
|
||||
|
|
|
@ -732,22 +732,6 @@ export default class BaseApplication {
|
|||
this.database_.setLogExcludedQueryTypes(['SELECT']);
|
||||
this.database_.setLogger(globalLogger);
|
||||
|
||||
// if (Setting.value('env') === 'dev') {
|
||||
// if (shim.isElectron()) {
|
||||
// this.database_.extensionToLoad = './lib/sql-extensions/spellfix';
|
||||
// }
|
||||
// } else {
|
||||
// if (shim.isElectron()) {
|
||||
// if (shim.isWindows()) {
|
||||
// const appDir = process.execPath.substring(0, process.execPath.lastIndexOf('\\'));
|
||||
// this.database_.extensionToLoad = `${appDir}/usr/lib/spellfix`;
|
||||
// } else {
|
||||
// const appDir = process.execPath.substring(0, process.execPath.lastIndexOf('/'));
|
||||
// this.database_.extensionToLoad = `${appDir}/usr/lib/spellfix`;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
await this.database_.open({ name: `${profileDir}/database.sqlite` });
|
||||
|
||||
// if (Setting.value('env') === 'dev') await this.database_.clearForTesting();
|
||||
|
@ -774,19 +758,6 @@ export default class BaseApplication {
|
|||
setLocale(Setting.value('locale'));
|
||||
}
|
||||
|
||||
// if (Setting.value('db.fuzzySearchEnabled') === -1) {
|
||||
// const fuzzySearchEnabled = await this.database_.fuzzySearchEnabled();
|
||||
// Setting.setValue('db.fuzzySearchEnabled', fuzzySearchEnabled ? 1 : 0);
|
||||
// }
|
||||
|
||||
// // Always disable on CLI because building and packaging the extension is not working
|
||||
// // and is too error-prone - requires gcc on the machine, or we should package the .so
|
||||
// // and dylib files, but it's not sure it would work everywhere if not built from
|
||||
// // source on the target machine.
|
||||
// if (Setting.value('appType') !== 'desktop') {
|
||||
// Setting.setValue('db.fuzzySearchEnabled', 0);
|
||||
// }
|
||||
|
||||
// For now always disable fuzzy search due to performance issues:
|
||||
// https://discourse.joplinapp.org/t/1-1-4-keyboard-locks-up-while-typing/11231/11
|
||||
// https://discourse.joplinapp.org/t/serious-lagging-when-there-are-tens-of-thousands-of-notes/11215/23
|
||||
|
|
|
@ -140,8 +140,6 @@ export default class JoplinDatabase extends Database {
|
|||
|
||||
constructor(driver: any) {
|
||||
super(driver);
|
||||
|
||||
// this.extensionToLoad = './build/lib/sql-extensions/spellfix';
|
||||
}
|
||||
|
||||
initialized() {
|
||||
|
@ -933,15 +931,6 @@ export default class JoplinDatabase extends Database {
|
|||
async initialize() {
|
||||
this.logger().info('Checking for database schema update...');
|
||||
|
||||
// try {
|
||||
// // Note that the only extension that can be loaded as of now is spellfix.
|
||||
// // If it fails here, it will fail on the fuzzySearchEnabled() check above
|
||||
// // too, thus disabling spellfix for the app.
|
||||
// await this.loadExtension(this.extensionToLoad);
|
||||
// } catch (error) {
|
||||
// this.logger().error(error);
|
||||
// }
|
||||
|
||||
let versionRow = null;
|
||||
try {
|
||||
// Will throw if the database has not been created yet, but this is handled below
|
||||
|
|
|
@ -18,7 +18,6 @@ export default class SearchEngine {
|
|||
public static SEARCH_TYPE_AUTO = 'auto';
|
||||
public static SEARCH_TYPE_BASIC = 'basic';
|
||||
public static SEARCH_TYPE_FTS = 'fts';
|
||||
public static SEARCH_TYPE_FTS_FUZZY = 'fts_fuzzy';
|
||||
|
||||
public dispatch: Function = (_o: any) => {};
|
||||
private logger_ = new Logger();
|
||||
|
@ -88,11 +87,6 @@ export default class SearchEngine {
|
|||
);
|
||||
}
|
||||
|
||||
if (!noteIds.length && (Setting.value('db.fuzzySearchEnabled') === 1)) {
|
||||
// On the last loop
|
||||
queries.push({ sql: 'INSERT INTO notes_spellfix(word,rank) SELECT term, documents FROM search_aux WHERE col=\'*\'' });
|
||||
}
|
||||
|
||||
await this.db().transactionExecBatch(queries);
|
||||
}
|
||||
|
||||
|
@ -157,16 +151,9 @@ export default class SearchEngine {
|
|||
[BaseModel.TYPE_NOTE, lastChangeId]
|
||||
);
|
||||
|
||||
const queries = [];
|
||||
if (!changes.length) break;
|
||||
|
||||
if (!changes.length) {
|
||||
if (Setting.value('db.fuzzySearchEnabled') === 1) {
|
||||
queries.push({ sql: 'DELETE FROM notes_spellfix' });
|
||||
queries.push({ sql: 'INSERT INTO notes_spellfix(word,rank) SELECT term, documents FROM search_aux WHERE col=\'*\'' });
|
||||
await this.db().transactionExecBatch(queries);
|
||||
}
|
||||
break;
|
||||
}
|
||||
const queries = [];
|
||||
|
||||
const noteIds = changes.map(a => a.item_id);
|
||||
const notes = await Note.modelSelectAll(`
|
||||
|
@ -273,7 +260,7 @@ export default class SearchEngine {
|
|||
|
||||
|
||||
|
||||
calculateWeightBM25_(rows: any[], fuzzyScore: any) {
|
||||
calculateWeightBM25_(rows: any[]) {
|
||||
// https://www.sqlite.org/fts3.html#matchinfo
|
||||
// pcnalx are the arguments passed to matchinfo
|
||||
// p - The number of matchable phrases in the query.
|
||||
|
@ -352,20 +339,14 @@ export default class SearchEngine {
|
|||
for (let i = 0; i < rows.length; i++) {
|
||||
const row = rows[i];
|
||||
row.weight = 0;
|
||||
row.fuzziness = 1000;
|
||||
row.wordFound = [];
|
||||
for (let j = 0; j < numPhrases; j++) {
|
||||
let found = false;
|
||||
columns.forEach(column => {
|
||||
const rowsWithHits = docsWithHits(X[i], column, j);
|
||||
const frequencyHits = hitsThisRow(X[i], column, j);
|
||||
const idf = IDF(rowsWithHits, numRows);
|
||||
found = found ? found : (frequencyHits > 0);
|
||||
|
||||
row.weight += BM25(idf, frequencyHits, numTokens[column][i], avgTokens[column]);
|
||||
row.fuzziness = (frequencyHits > 0) ? Math.min(row.fuzziness, fuzzyScore[j]) : row.fuzziness;
|
||||
});
|
||||
row.wordFound.push(found);
|
||||
}
|
||||
|
||||
row.weight += weightForDaysSinceLastUpdate(row);
|
||||
|
@ -392,35 +373,18 @@ export default class SearchEngine {
|
|||
}
|
||||
|
||||
processResults_(rows: any[], parsedQuery: any, isBasicSearchResults = false) {
|
||||
const rowContainsAllWords = (wordsFound: any, numFuzzyMatches: any) => {
|
||||
let start = 0;
|
||||
let end = 0;
|
||||
for (let i = 0; i < numFuzzyMatches.length; i++) {
|
||||
end = end + numFuzzyMatches[i];
|
||||
if (!(wordsFound.slice(start, end).find((x: any) => x))) {
|
||||
// This note doesn't contain any fuzzy matches for the word
|
||||
return false;
|
||||
}
|
||||
start = end;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
if (isBasicSearchResults) {
|
||||
this.processBasicSearchResults_(rows, parsedQuery);
|
||||
} else {
|
||||
this.calculateWeightBM25_(rows, parsedQuery.fuzzyScore);
|
||||
this.calculateWeightBM25_(rows);
|
||||
for (let i = 0; i < rows.length; i++) {
|
||||
const row = rows[i];
|
||||
row.include = (parsedQuery.fuzzy && !parsedQuery.any) ? rowContainsAllWords(row.wordFound, parsedQuery.numFuzzyMatches) : true;
|
||||
const offsets = row.offsets.split(' ').map((o: any) => Number(o));
|
||||
row.fields = this.fieldNamesFromOffsets_(offsets);
|
||||
}
|
||||
}
|
||||
|
||||
rows.sort((a, b) => {
|
||||
if (a.fuzziness < b.fuzziness) return -1;
|
||||
if (a.fuzziness > b.fuzziness) return +1;
|
||||
if (a.fields.includes('title') && !b.fields.includes('title')) return -1;
|
||||
if (!a.fields.includes('title') && b.fields.includes('title')) return +1;
|
||||
if (a.weight < b.weight) return +1;
|
||||
|
@ -448,22 +412,11 @@ export default class SearchEngine {
|
|||
return regexString;
|
||||
}
|
||||
|
||||
async fuzzifier(words: string[]) {
|
||||
const fuzzyMatches: any[] = [];
|
||||
words.forEach(word => {
|
||||
const fuzzyWords = this.db().selectAll('SELECT word, score FROM notes_spellfix WHERE word MATCH ? AND top=3', [word]);
|
||||
fuzzyMatches.push(fuzzyWords);
|
||||
});
|
||||
return await Promise.all(fuzzyMatches);
|
||||
}
|
||||
|
||||
async parseQuery(query: string, fuzzy: any = null) {
|
||||
if (fuzzy === null) fuzzy = Setting.value('db.fuzzySearchEnabled') === 1;
|
||||
async parseQuery(query: string) {
|
||||
|
||||
const trimQuotes = (str: string) => str.startsWith('"') ? str.substr(1, str.length - 2) : str;
|
||||
|
||||
let allTerms: any[] = [];
|
||||
let allFuzzyTerms = [];
|
||||
|
||||
try {
|
||||
allTerms = filterParser(query);
|
||||
|
@ -471,81 +424,11 @@ export default class SearchEngine {
|
|||
console.warn(error);
|
||||
}
|
||||
|
||||
const textTerms = allTerms.filter(x => x.name === 'text' && !x.negated);
|
||||
const titleTerms = allTerms.filter(x => x.name === 'title' && !x.negated);
|
||||
const bodyTerms = allTerms.filter(x => x.name === 'body' && !x.negated);
|
||||
const textTerms = allTerms.filter(x => x.name === 'text' && !x.negated).map(x => trimQuotes(x.value));
|
||||
const titleTerms = allTerms.filter(x => x.name === 'title' && !x.negated).map(x => trimQuotes(x.value));
|
||||
const bodyTerms = allTerms.filter(x => x.name === 'body' && !x.negated).map(x => trimQuotes(x.value));
|
||||
|
||||
const fuzzyScore = [];
|
||||
let numFuzzyMatches = [];
|
||||
let terms: any = null;
|
||||
|
||||
if (fuzzy) {
|
||||
const fuzzyText = await this.fuzzifier(textTerms.filter(x => !(x.quoted || x.wildcard)).map(x => trimQuotes(x.value)));
|
||||
const fuzzyTitle = await this.fuzzifier(titleTerms.filter(x => !x.wildcard).map(x => trimQuotes(x.value)));
|
||||
const fuzzyBody = await this.fuzzifier(bodyTerms.filter(x => !x.wildcard).map(x => trimQuotes(x.value)));
|
||||
|
||||
// Floor the fuzzy scores to 0, 1 and 2.
|
||||
const floorFuzzyScore = (matches: any) => {
|
||||
for (let i = 0; i < matches.length; i++) matches[i].score = i;
|
||||
};
|
||||
|
||||
fuzzyText.forEach(floorFuzzyScore);
|
||||
fuzzyTitle.forEach(floorFuzzyScore);
|
||||
fuzzyBody.forEach(floorFuzzyScore);
|
||||
|
||||
const phraseTextSearch = textTerms.filter(x => x.quoted);
|
||||
const wildCardSearch = textTerms.concat(titleTerms).concat(bodyTerms).filter(x => x.wildcard);
|
||||
|
||||
// Save number of fuzzy matches we got for each word
|
||||
// fuzzifier() is currently set to return at most 3 matches
|
||||
// We need to know which fuzzy words go together so that we can filter out notes that don't contain a required word.
|
||||
numFuzzyMatches = fuzzyText.concat(fuzzyTitle).concat(fuzzyBody).map(x => x.length);
|
||||
for (let i = 0; i < phraseTextSearch.length + wildCardSearch.length; i++) {
|
||||
// Phrase searches and wildcard searches are preserved without fuzzification (A single match)
|
||||
numFuzzyMatches.push(1);
|
||||
}
|
||||
|
||||
const mergedFuzzyText = [].concat.apply([], fuzzyText);
|
||||
const mergedFuzzyTitle = [].concat.apply([], fuzzyTitle);
|
||||
const mergedFuzzyBody = [].concat.apply([], fuzzyBody);
|
||||
|
||||
const fuzzyTextTerms = mergedFuzzyText.map(x => { return { name: 'text', value: x.word, negated: false, score: x.score }; });
|
||||
const fuzzyTitleTerms = mergedFuzzyTitle.map(x => { return { name: 'title', value: x.word, negated: false, score: x.score }; });
|
||||
const fuzzyBodyTerms = mergedFuzzyBody.map(x => { return { name: 'body', value: x.word, negated: false, score: x.score }; });
|
||||
|
||||
// Remove previous text, title and body and replace with fuzzy versions
|
||||
allTerms = allTerms.filter(x => (x.name !== 'text' && x.name !== 'title' && x.name !== 'body'));
|
||||
|
||||
// The order matters here!
|
||||
// The text goes first, then title, then body, then phrase and finally wildcard
|
||||
// This is because it needs to match with numFuzzyMathches.
|
||||
allFuzzyTerms = allTerms.concat(fuzzyTextTerms).concat(fuzzyTitleTerms).concat(fuzzyBodyTerms).concat(phraseTextSearch).concat(wildCardSearch);
|
||||
|
||||
const allTextTerms = allFuzzyTerms.filter(x => x.name === 'title' || x.name === 'body' || x.name === 'text');
|
||||
for (let i = 0; i < allTextTerms.length; i++) {
|
||||
// Phrase searches and wildcard searches will get a fuzziness score of zero.
|
||||
// This means that they will go first in the sort order (Even if there are other words with matches in the title)
|
||||
// Undesirable?
|
||||
fuzzyScore.push(allFuzzyTerms[i].score ? allFuzzyTerms[i].score : 0);
|
||||
}
|
||||
|
||||
const wildCardTextTerms = wildCardSearch.filter(x => x.name === 'text').map(x =>trimQuotes(x.value));
|
||||
const wildCardTitleTerms = wildCardSearch.filter(x => x.name === 'title').map(x =>trimQuotes(x.value));
|
||||
const wildCardBodyTerms = wildCardSearch.filter(x => x.name === 'body').map(x =>trimQuotes(x.value));
|
||||
const phraseTextTerms = phraseTextSearch.map(x => trimQuotes(x.value));
|
||||
|
||||
terms = {
|
||||
_: fuzzyTextTerms.map(x => trimQuotes(x.value)).concat(phraseTextTerms).concat(wildCardTextTerms),
|
||||
title: fuzzyTitleTerms.map(x => trimQuotes(x.value)).concat(wildCardTitleTerms),
|
||||
body: fuzzyBodyTerms.map(x => trimQuotes(x.value)).concat(wildCardBodyTerms),
|
||||
};
|
||||
} else {
|
||||
const nonNegatedTextTerms = textTerms.length + titleTerms.length + bodyTerms.length;
|
||||
for (let i = 0; i < nonNegatedTextTerms; i++) {
|
||||
fuzzyScore.push(0);
|
||||
}
|
||||
terms = { _: textTerms.map(x =>trimQuotes(x.value)), 'title': titleTerms.map(x =>trimQuotes(x.value)), 'body': bodyTerms.map(x =>trimQuotes(x.value)) };
|
||||
}
|
||||
const terms: any = { _: textTerms, 'title': titleTerms, 'body': bodyTerms };
|
||||
|
||||
// Filter terms:
|
||||
// - Convert wildcards to regex
|
||||
|
@ -603,10 +486,7 @@ export default class SearchEngine {
|
|||
termCount: termCount,
|
||||
keys: keys,
|
||||
terms: terms, // text terms
|
||||
allTerms: fuzzy ? allFuzzyTerms : allTerms,
|
||||
fuzzyScore: fuzzyScore,
|
||||
numFuzzyMatches: numFuzzyMatches,
|
||||
fuzzy: fuzzy,
|
||||
allTerms: allTerms,
|
||||
any: !!allTerms.find(term => term.name === 'any'),
|
||||
};
|
||||
}
|
||||
|
@ -651,8 +531,8 @@ export default class SearchEngine {
|
|||
return Note.previews(null, searchOptions);
|
||||
}
|
||||
|
||||
determineSearchType_(query: string, options: any) {
|
||||
if (options.searchType === SearchEngine.SEARCH_TYPE_BASIC) return SearchEngine.SEARCH_TYPE_BASIC;
|
||||
determineSearchType_(query: string, preferredSearchType: any) {
|
||||
if (preferredSearchType === SearchEngine.SEARCH_TYPE_BASIC) return SearchEngine.SEARCH_TYPE_BASIC;
|
||||
|
||||
// If preferredSearchType is "fts" we auto-detect anyway
|
||||
// because it's not always supported.
|
||||
|
@ -669,12 +549,9 @@ export default class SearchEngine {
|
|||
|
||||
if (!Setting.value('db.ftsEnabled') || ['ja', 'zh', 'ko', 'th'].indexOf(st) >= 0) {
|
||||
return SearchEngine.SEARCH_TYPE_BASIC;
|
||||
} else if (options.fuzzy) {
|
||||
return SearchEngine.SEARCH_TYPE_FTS_FUZZY;
|
||||
} else {
|
||||
return SearchEngine.SEARCH_TYPE_FTS;
|
||||
}
|
||||
|
||||
return SearchEngine.SEARCH_TYPE_FTS;
|
||||
}
|
||||
|
||||
async search(searchString: string, options: any = null) {
|
||||
|
@ -682,35 +559,30 @@ export default class SearchEngine {
|
|||
|
||||
options = Object.assign({}, {
|
||||
searchType: SearchEngine.SEARCH_TYPE_AUTO,
|
||||
fuzzy: Setting.value('db.fuzzySearchEnabled') === 1,
|
||||
}, options);
|
||||
|
||||
const searchType = this.determineSearchType_(searchString, options);
|
||||
const searchType = this.determineSearchType_(searchString, options.searchType);
|
||||
const parsedQuery = await this.parseQuery(searchString);
|
||||
|
||||
if (searchType === SearchEngine.SEARCH_TYPE_BASIC) {
|
||||
// Non-alphabetical languages aren't support by SQLite FTS (except with extensions which are not available in all platforms)
|
||||
searchString = this.normalizeText_(searchString);
|
||||
const rows = await this.basicSearch(searchString);
|
||||
const parsedQuery = await this.parseQuery(searchString);
|
||||
|
||||
this.processResults_(rows, parsedQuery, true);
|
||||
return rows;
|
||||
} else {
|
||||
// SEARCH_TYPE_FTS or SEARCH_TYPE_FTS_FUZZY
|
||||
// SEARCH_TYPE_FTS
|
||||
// FTS will ignore all special characters, like "-" in the index. So if
|
||||
// we search for "this-phrase" it won't find it because it will only
|
||||
// see "this phrase" in the index. Because of this, we remove the dashes
|
||||
// when searching.
|
||||
// https://github.com/laurent22/joplin/issues/1075#issuecomment-459258856
|
||||
|
||||
const parsedQuery = await this.parseQuery(searchString, searchType === SearchEngine.SEARCH_TYPE_FTS_FUZZY);
|
||||
|
||||
try {
|
||||
const { query, params } = queryBuilder(parsedQuery.allTerms, searchType === SearchEngine.SEARCH_TYPE_FTS_FUZZY);
|
||||
const { query, params } = queryBuilder(parsedQuery.allTerms);
|
||||
const rows = await this.db().selectAll(query, params);
|
||||
this.processResults_(rows, parsedQuery);
|
||||
if (searchType === SearchEngine.SEARCH_TYPE_FTS_FUZZY && !parsedQuery.any) {
|
||||
return rows.filter((row: any) => row.include);
|
||||
}
|
||||
return rows;
|
||||
} catch (error) {
|
||||
this.logger().warn(`Cannot execute MATCH query: ${searchString}: ${error.message}`);
|
||||
|
|
|
@ -300,7 +300,7 @@ const sourceUrlFilter = (terms: Term[], conditons: string[], params: string[], r
|
|||
};
|
||||
|
||||
|
||||
const textFilter = (terms: Term[], conditions: string[], params: string[], relation: Relation, fuzzy: Boolean) => {
|
||||
const textFilter = (terms: Term[], conditions: string[], params: string[], relation: Relation) => {
|
||||
const addExcludeTextConditions = (excludedTerms: Term[], conditions: string[], params: string[], relation: Relation) => {
|
||||
const type = excludedTerms[0].name === 'text' ? '' : `.${excludedTerms[0].name}`;
|
||||
|
||||
|
@ -342,7 +342,7 @@ const textFilter = (terms: Term[], conditions: string[], params: string[], relat
|
|||
if (term.name === 'text') return term.value;
|
||||
else return `${term.name}:${term.value}`;
|
||||
});
|
||||
const matchQuery = (fuzzy || (relation === 'OR')) ? termsToMatch.join(' OR ') : termsToMatch.join(' ');
|
||||
const matchQuery = (relation === 'OR') ? termsToMatch.join(' OR ') : termsToMatch.join(' ');
|
||||
params.push(matchQuery);
|
||||
}
|
||||
|
||||
|
@ -374,7 +374,7 @@ const getConnective = (terms: Term[], relation: Relation): string => {
|
|||
return (!notebookTerm && (relation === 'OR')) ? 'ROWID=-1' : '1'; // ROWID=-1 acts as 0 (something always false)
|
||||
};
|
||||
|
||||
export default function queryBuilder(terms: Term[], fuzzy: boolean) {
|
||||
export default function queryBuilder(terms: Term[]) {
|
||||
const queryParts: string[] = [];
|
||||
const params: string[] = [];
|
||||
const withs: string[] = [];
|
||||
|
@ -402,7 +402,7 @@ export default function queryBuilder(terms: Term[], fuzzy: boolean) {
|
|||
|
||||
resourceFilter(terms, queryParts, params, relation, withs);
|
||||
|
||||
textFilter(terms, queryParts, params, relation, fuzzy);
|
||||
textFilter(terms, queryParts, params, relation);
|
||||
|
||||
|
||||
typeFilter(terms, queryParts, params, relation);
|
||||
|
|
Loading…
Reference in New Issue