Chore: Clean up unused fuzzy search code (#4593)

pull/4657/head
Naveen M V 2021-03-11 03:57:45 +05:30 committed by GitHub
parent ec99511397
commit 3657c0369e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 24 additions and 195 deletions

View File

@ -67,7 +67,6 @@ class GotoAnything {
class Dialog extends React.PureComponent<Props, State> {
private fuzzy_: boolean;
private styles_: any;
private inputRef: any;
private itemListRef: any;
@ -77,8 +76,6 @@ class Dialog extends React.PureComponent<Props, State> {
constructor(props: Props) {
super(props);
this.fuzzy_ = false;
const startString = props?.userData?.startString ? props?.userData?.startString : '';
this.state = {
@ -242,7 +239,7 @@ class Dialog extends React.PureComponent<Props, State> {
}
async keywords(searchQuery: string) {
const parsedQuery = await SearchEngine.instance().parseQuery(searchQuery, this.fuzzy_);
const parsedQuery = await SearchEngine.instance().parseQuery(searchQuery);
return SearchEngine.instance().allParsedQueryTerms(parsedQuery);
}
@ -296,7 +293,7 @@ class Dialog extends React.PureComponent<Props, State> {
} else { // Note TITLE or BODY
listType = BaseModel.TYPE_NOTE;
searchQuery = this.makeSearchQuery(this.state.query);
results = await SearchEngine.instance().search(searchQuery, { fuzzy: this.fuzzy_ });
results = await SearchEngine.instance().search(searchQuery);
resultsInBody = !!results.find((row: any) => row.fields.includes('body'));

View File

@ -732,22 +732,6 @@ export default class BaseApplication {
this.database_.setLogExcludedQueryTypes(['SELECT']);
this.database_.setLogger(globalLogger);
// if (Setting.value('env') === 'dev') {
// if (shim.isElectron()) {
// this.database_.extensionToLoad = './lib/sql-extensions/spellfix';
// }
// } else {
// if (shim.isElectron()) {
// if (shim.isWindows()) {
// const appDir = process.execPath.substring(0, process.execPath.lastIndexOf('\\'));
// this.database_.extensionToLoad = `${appDir}/usr/lib/spellfix`;
// } else {
// const appDir = process.execPath.substring(0, process.execPath.lastIndexOf('/'));
// this.database_.extensionToLoad = `${appDir}/usr/lib/spellfix`;
// }
// }
// }
await this.database_.open({ name: `${profileDir}/database.sqlite` });
// if (Setting.value('env') === 'dev') await this.database_.clearForTesting();
@ -774,19 +758,6 @@ export default class BaseApplication {
setLocale(Setting.value('locale'));
}
// if (Setting.value('db.fuzzySearchEnabled') === -1) {
// const fuzzySearchEnabled = await this.database_.fuzzySearchEnabled();
// Setting.setValue('db.fuzzySearchEnabled', fuzzySearchEnabled ? 1 : 0);
// }
// // Always disable on CLI because building and packaging the extension is not working
// // and is too error-prone - requires gcc on the machine, or we should package the .so
// // and dylib files, but it's not sure it would work everywhere if not built from
// // source on the target machine.
// if (Setting.value('appType') !== 'desktop') {
// Setting.setValue('db.fuzzySearchEnabled', 0);
// }
// For now always disable fuzzy search due to performance issues:
// https://discourse.joplinapp.org/t/1-1-4-keyboard-locks-up-while-typing/11231/11
// https://discourse.joplinapp.org/t/serious-lagging-when-there-are-tens-of-thousands-of-notes/11215/23

View File

@ -140,8 +140,6 @@ export default class JoplinDatabase extends Database {
constructor(driver: any) {
super(driver);
// this.extensionToLoad = './build/lib/sql-extensions/spellfix';
}
initialized() {
@ -933,15 +931,6 @@ export default class JoplinDatabase extends Database {
async initialize() {
this.logger().info('Checking for database schema update...');
// try {
// // Note that the only extension that can be loaded as of now is spellfix.
// // If it fails here, it will fail on the fuzzySearchEnabled() check above
// // too, thus disabling spellfix for the app.
// await this.loadExtension(this.extensionToLoad);
// } catch (error) {
// this.logger().error(error);
// }
let versionRow = null;
try {
// Will throw if the database has not been created yet, but this is handled below

View File

@ -18,7 +18,6 @@ export default class SearchEngine {
public static SEARCH_TYPE_AUTO = 'auto';
public static SEARCH_TYPE_BASIC = 'basic';
public static SEARCH_TYPE_FTS = 'fts';
public static SEARCH_TYPE_FTS_FUZZY = 'fts_fuzzy';
public dispatch: Function = (_o: any) => {};
private logger_ = new Logger();
@ -88,11 +87,6 @@ export default class SearchEngine {
);
}
if (!noteIds.length && (Setting.value('db.fuzzySearchEnabled') === 1)) {
// On the last loop
queries.push({ sql: 'INSERT INTO notes_spellfix(word,rank) SELECT term, documents FROM search_aux WHERE col=\'*\'' });
}
await this.db().transactionExecBatch(queries);
}
@ -157,16 +151,9 @@ export default class SearchEngine {
[BaseModel.TYPE_NOTE, lastChangeId]
);
const queries = [];
if (!changes.length) break;
if (!changes.length) {
if (Setting.value('db.fuzzySearchEnabled') === 1) {
queries.push({ sql: 'DELETE FROM notes_spellfix' });
queries.push({ sql: 'INSERT INTO notes_spellfix(word,rank) SELECT term, documents FROM search_aux WHERE col=\'*\'' });
await this.db().transactionExecBatch(queries);
}
break;
}
const queries = [];
const noteIds = changes.map(a => a.item_id);
const notes = await Note.modelSelectAll(`
@ -273,7 +260,7 @@ export default class SearchEngine {
calculateWeightBM25_(rows: any[], fuzzyScore: any) {
calculateWeightBM25_(rows: any[]) {
// https://www.sqlite.org/fts3.html#matchinfo
// pcnalx are the arguments passed to matchinfo
// p - The number of matchable phrases in the query.
@ -352,20 +339,14 @@ export default class SearchEngine {
for (let i = 0; i < rows.length; i++) {
const row = rows[i];
row.weight = 0;
row.fuzziness = 1000;
row.wordFound = [];
for (let j = 0; j < numPhrases; j++) {
let found = false;
columns.forEach(column => {
const rowsWithHits = docsWithHits(X[i], column, j);
const frequencyHits = hitsThisRow(X[i], column, j);
const idf = IDF(rowsWithHits, numRows);
found = found ? found : (frequencyHits > 0);
row.weight += BM25(idf, frequencyHits, numTokens[column][i], avgTokens[column]);
row.fuzziness = (frequencyHits > 0) ? Math.min(row.fuzziness, fuzzyScore[j]) : row.fuzziness;
});
row.wordFound.push(found);
}
row.weight += weightForDaysSinceLastUpdate(row);
@ -392,35 +373,18 @@ export default class SearchEngine {
}
processResults_(rows: any[], parsedQuery: any, isBasicSearchResults = false) {
const rowContainsAllWords = (wordsFound: any, numFuzzyMatches: any) => {
let start = 0;
let end = 0;
for (let i = 0; i < numFuzzyMatches.length; i++) {
end = end + numFuzzyMatches[i];
if (!(wordsFound.slice(start, end).find((x: any) => x))) {
// This note doesn't contain any fuzzy matches for the word
return false;
}
start = end;
}
return true;
};
if (isBasicSearchResults) {
this.processBasicSearchResults_(rows, parsedQuery);
} else {
this.calculateWeightBM25_(rows, parsedQuery.fuzzyScore);
this.calculateWeightBM25_(rows);
for (let i = 0; i < rows.length; i++) {
const row = rows[i];
row.include = (parsedQuery.fuzzy && !parsedQuery.any) ? rowContainsAllWords(row.wordFound, parsedQuery.numFuzzyMatches) : true;
const offsets = row.offsets.split(' ').map((o: any) => Number(o));
row.fields = this.fieldNamesFromOffsets_(offsets);
}
}
rows.sort((a, b) => {
if (a.fuzziness < b.fuzziness) return -1;
if (a.fuzziness > b.fuzziness) return +1;
if (a.fields.includes('title') && !b.fields.includes('title')) return -1;
if (!a.fields.includes('title') && b.fields.includes('title')) return +1;
if (a.weight < b.weight) return +1;
@ -448,22 +412,11 @@ export default class SearchEngine {
return regexString;
}
async fuzzifier(words: string[]) {
const fuzzyMatches: any[] = [];
words.forEach(word => {
const fuzzyWords = this.db().selectAll('SELECT word, score FROM notes_spellfix WHERE word MATCH ? AND top=3', [word]);
fuzzyMatches.push(fuzzyWords);
});
return await Promise.all(fuzzyMatches);
}
async parseQuery(query: string, fuzzy: any = null) {
if (fuzzy === null) fuzzy = Setting.value('db.fuzzySearchEnabled') === 1;
async parseQuery(query: string) {
const trimQuotes = (str: string) => str.startsWith('"') ? str.substr(1, str.length - 2) : str;
let allTerms: any[] = [];
let allFuzzyTerms = [];
try {
allTerms = filterParser(query);
@ -471,81 +424,11 @@ export default class SearchEngine {
console.warn(error);
}
const textTerms = allTerms.filter(x => x.name === 'text' && !x.negated);
const titleTerms = allTerms.filter(x => x.name === 'title' && !x.negated);
const bodyTerms = allTerms.filter(x => x.name === 'body' && !x.negated);
const textTerms = allTerms.filter(x => x.name === 'text' && !x.negated).map(x => trimQuotes(x.value));
const titleTerms = allTerms.filter(x => x.name === 'title' && !x.negated).map(x => trimQuotes(x.value));
const bodyTerms = allTerms.filter(x => x.name === 'body' && !x.negated).map(x => trimQuotes(x.value));
const fuzzyScore = [];
let numFuzzyMatches = [];
let terms: any = null;
if (fuzzy) {
const fuzzyText = await this.fuzzifier(textTerms.filter(x => !(x.quoted || x.wildcard)).map(x => trimQuotes(x.value)));
const fuzzyTitle = await this.fuzzifier(titleTerms.filter(x => !x.wildcard).map(x => trimQuotes(x.value)));
const fuzzyBody = await this.fuzzifier(bodyTerms.filter(x => !x.wildcard).map(x => trimQuotes(x.value)));
// Floor the fuzzy scores to 0, 1 and 2.
const floorFuzzyScore = (matches: any) => {
for (let i = 0; i < matches.length; i++) matches[i].score = i;
};
fuzzyText.forEach(floorFuzzyScore);
fuzzyTitle.forEach(floorFuzzyScore);
fuzzyBody.forEach(floorFuzzyScore);
const phraseTextSearch = textTerms.filter(x => x.quoted);
const wildCardSearch = textTerms.concat(titleTerms).concat(bodyTerms).filter(x => x.wildcard);
// Save number of fuzzy matches we got for each word
// fuzzifier() is currently set to return at most 3 matches
// We need to know which fuzzy words go together so that we can filter out notes that don't contain a required word.
numFuzzyMatches = fuzzyText.concat(fuzzyTitle).concat(fuzzyBody).map(x => x.length);
for (let i = 0; i < phraseTextSearch.length + wildCardSearch.length; i++) {
// Phrase searches and wildcard searches are preserved without fuzzification (A single match)
numFuzzyMatches.push(1);
}
const mergedFuzzyText = [].concat.apply([], fuzzyText);
const mergedFuzzyTitle = [].concat.apply([], fuzzyTitle);
const mergedFuzzyBody = [].concat.apply([], fuzzyBody);
const fuzzyTextTerms = mergedFuzzyText.map(x => { return { name: 'text', value: x.word, negated: false, score: x.score }; });
const fuzzyTitleTerms = mergedFuzzyTitle.map(x => { return { name: 'title', value: x.word, negated: false, score: x.score }; });
const fuzzyBodyTerms = mergedFuzzyBody.map(x => { return { name: 'body', value: x.word, negated: false, score: x.score }; });
// Remove previous text, title and body and replace with fuzzy versions
allTerms = allTerms.filter(x => (x.name !== 'text' && x.name !== 'title' && x.name !== 'body'));
// The order matters here!
// The text goes first, then title, then body, then phrase and finally wildcard
// This is because it needs to match with numFuzzyMathches.
allFuzzyTerms = allTerms.concat(fuzzyTextTerms).concat(fuzzyTitleTerms).concat(fuzzyBodyTerms).concat(phraseTextSearch).concat(wildCardSearch);
const allTextTerms = allFuzzyTerms.filter(x => x.name === 'title' || x.name === 'body' || x.name === 'text');
for (let i = 0; i < allTextTerms.length; i++) {
// Phrase searches and wildcard searches will get a fuzziness score of zero.
// This means that they will go first in the sort order (Even if there are other words with matches in the title)
// Undesirable?
fuzzyScore.push(allFuzzyTerms[i].score ? allFuzzyTerms[i].score : 0);
}
const wildCardTextTerms = wildCardSearch.filter(x => x.name === 'text').map(x =>trimQuotes(x.value));
const wildCardTitleTerms = wildCardSearch.filter(x => x.name === 'title').map(x =>trimQuotes(x.value));
const wildCardBodyTerms = wildCardSearch.filter(x => x.name === 'body').map(x =>trimQuotes(x.value));
const phraseTextTerms = phraseTextSearch.map(x => trimQuotes(x.value));
terms = {
_: fuzzyTextTerms.map(x => trimQuotes(x.value)).concat(phraseTextTerms).concat(wildCardTextTerms),
title: fuzzyTitleTerms.map(x => trimQuotes(x.value)).concat(wildCardTitleTerms),
body: fuzzyBodyTerms.map(x => trimQuotes(x.value)).concat(wildCardBodyTerms),
};
} else {
const nonNegatedTextTerms = textTerms.length + titleTerms.length + bodyTerms.length;
for (let i = 0; i < nonNegatedTextTerms; i++) {
fuzzyScore.push(0);
}
terms = { _: textTerms.map(x =>trimQuotes(x.value)), 'title': titleTerms.map(x =>trimQuotes(x.value)), 'body': bodyTerms.map(x =>trimQuotes(x.value)) };
}
const terms: any = { _: textTerms, 'title': titleTerms, 'body': bodyTerms };
// Filter terms:
// - Convert wildcards to regex
@ -603,10 +486,7 @@ export default class SearchEngine {
termCount: termCount,
keys: keys,
terms: terms, // text terms
allTerms: fuzzy ? allFuzzyTerms : allTerms,
fuzzyScore: fuzzyScore,
numFuzzyMatches: numFuzzyMatches,
fuzzy: fuzzy,
allTerms: allTerms,
any: !!allTerms.find(term => term.name === 'any'),
};
}
@ -651,8 +531,8 @@ export default class SearchEngine {
return Note.previews(null, searchOptions);
}
determineSearchType_(query: string, options: any) {
if (options.searchType === SearchEngine.SEARCH_TYPE_BASIC) return SearchEngine.SEARCH_TYPE_BASIC;
determineSearchType_(query: string, preferredSearchType: any) {
if (preferredSearchType === SearchEngine.SEARCH_TYPE_BASIC) return SearchEngine.SEARCH_TYPE_BASIC;
// If preferredSearchType is "fts" we auto-detect anyway
// because it's not always supported.
@ -669,12 +549,9 @@ export default class SearchEngine {
if (!Setting.value('db.ftsEnabled') || ['ja', 'zh', 'ko', 'th'].indexOf(st) >= 0) {
return SearchEngine.SEARCH_TYPE_BASIC;
} else if (options.fuzzy) {
return SearchEngine.SEARCH_TYPE_FTS_FUZZY;
} else {
return SearchEngine.SEARCH_TYPE_FTS;
}
return SearchEngine.SEARCH_TYPE_FTS;
}
async search(searchString: string, options: any = null) {
@ -682,35 +559,30 @@ export default class SearchEngine {
options = Object.assign({}, {
searchType: SearchEngine.SEARCH_TYPE_AUTO,
fuzzy: Setting.value('db.fuzzySearchEnabled') === 1,
}, options);
const searchType = this.determineSearchType_(searchString, options);
const searchType = this.determineSearchType_(searchString, options.searchType);
const parsedQuery = await this.parseQuery(searchString);
if (searchType === SearchEngine.SEARCH_TYPE_BASIC) {
// Non-alphabetical languages aren't support by SQLite FTS (except with extensions which are not available in all platforms)
searchString = this.normalizeText_(searchString);
const rows = await this.basicSearch(searchString);
const parsedQuery = await this.parseQuery(searchString);
this.processResults_(rows, parsedQuery, true);
return rows;
} else {
// SEARCH_TYPE_FTS or SEARCH_TYPE_FTS_FUZZY
// SEARCH_TYPE_FTS
// FTS will ignore all special characters, like "-" in the index. So if
// we search for "this-phrase" it won't find it because it will only
// see "this phrase" in the index. Because of this, we remove the dashes
// when searching.
// https://github.com/laurent22/joplin/issues/1075#issuecomment-459258856
const parsedQuery = await this.parseQuery(searchString, searchType === SearchEngine.SEARCH_TYPE_FTS_FUZZY);
try {
const { query, params } = queryBuilder(parsedQuery.allTerms, searchType === SearchEngine.SEARCH_TYPE_FTS_FUZZY);
const { query, params } = queryBuilder(parsedQuery.allTerms);
const rows = await this.db().selectAll(query, params);
this.processResults_(rows, parsedQuery);
if (searchType === SearchEngine.SEARCH_TYPE_FTS_FUZZY && !parsedQuery.any) {
return rows.filter((row: any) => row.include);
}
return rows;
} catch (error) {
this.logger().warn(`Cannot execute MATCH query: ${searchString}: ${error.message}`);

View File

@ -300,7 +300,7 @@ const sourceUrlFilter = (terms: Term[], conditons: string[], params: string[], r
};
const textFilter = (terms: Term[], conditions: string[], params: string[], relation: Relation, fuzzy: Boolean) => {
const textFilter = (terms: Term[], conditions: string[], params: string[], relation: Relation) => {
const addExcludeTextConditions = (excludedTerms: Term[], conditions: string[], params: string[], relation: Relation) => {
const type = excludedTerms[0].name === 'text' ? '' : `.${excludedTerms[0].name}`;
@ -342,7 +342,7 @@ const textFilter = (terms: Term[], conditions: string[], params: string[], relat
if (term.name === 'text') return term.value;
else return `${term.name}:${term.value}`;
});
const matchQuery = (fuzzy || (relation === 'OR')) ? termsToMatch.join(' OR ') : termsToMatch.join(' ');
const matchQuery = (relation === 'OR') ? termsToMatch.join(' OR ') : termsToMatch.join(' ');
params.push(matchQuery);
}
@ -374,7 +374,7 @@ const getConnective = (terms: Term[], relation: Relation): string => {
return (!notebookTerm && (relation === 'OR')) ? 'ROWID=-1' : '1'; // ROWID=-1 acts as 0 (something always false)
};
export default function queryBuilder(terms: Term[], fuzzy: boolean) {
export default function queryBuilder(terms: Term[]) {
const queryParts: string[] = [];
const params: string[] = [];
const withs: string[] = [];
@ -402,7 +402,7 @@ export default function queryBuilder(terms: Term[], fuzzy: boolean) {
resourceFilter(terms, queryParts, params, relation, withs);
textFilter(terms, queryParts, params, relation, fuzzy);
textFilter(terms, queryParts, params, relation);
typeFilter(terms, queryParts, params, relation);