Chore: Clean up unused fuzzy search code (#4593)

2021-03-11 03:57:45 +05:30 · 2021-03-11 03:57:45 +05:30 · 3657c0369e
parent ec99511397
commit 3657c0369e
5 changed files with 24 additions and 195 deletions
--- a/packages/app-desktop/plugins/GotoAnything.tsx
+++ b/packages/app-desktop/plugins/GotoAnything.tsx
@ -67,7 +67,6 @@ class GotoAnything {

 class Dialog extends React.PureComponent<Props, State> {

-	private fuzzy_: boolean;
 	private styles_: any;
 	private inputRef: any;
 	private itemListRef: any;
@ -77,8 +76,6 @@ class Dialog extends React.PureComponent<Props, State> {
 	constructor(props: Props) {
 		super(props);

-		this.fuzzy_ = false;
-
 		const startString = props?.userData?.startString ? props?.userData?.startString : '';

 		this.state = {
@ -242,7 +239,7 @@ class Dialog extends React.PureComponent<Props, State> {
 	}

 	async keywords(searchQuery: string) {
-		const parsedQuery = await SearchEngine.instance().parseQuery(searchQuery, this.fuzzy_);
+		const parsedQuery = await SearchEngine.instance().parseQuery(searchQuery);
 		return SearchEngine.instance().allParsedQueryTerms(parsedQuery);
 	}

@ -296,7 +293,7 @@ class Dialog extends React.PureComponent<Props, State> {
 			} else { // Note TITLE or BODY
 				listType = BaseModel.TYPE_NOTE;
 				searchQuery = this.makeSearchQuery(this.state.query);
-				results = await SearchEngine.instance().search(searchQuery, { fuzzy: this.fuzzy_ });
+				results = await SearchEngine.instance().search(searchQuery);

 				resultsInBody = !!results.find((row: any) => row.fields.includes('body'));

--- a/packages/lib/BaseApplication.ts
+++ b/packages/lib/BaseApplication.ts
@ -732,22 +732,6 @@ export default class BaseApplication {
 		this.database_.setLogExcludedQueryTypes(['SELECT']);
 		this.database_.setLogger(globalLogger);

-		// if (Setting.value('env') === 'dev') {
-		// 	if (shim.isElectron()) {
-		// 		this.database_.extensionToLoad = './lib/sql-extensions/spellfix';
-		// 	}
-		// } else {
-		// 	if (shim.isElectron()) {
-		// 		if (shim.isWindows()) {
-		// 			const appDir = process.execPath.substring(0, process.execPath.lastIndexOf('\\'));
-		// 			this.database_.extensionToLoad = `${appDir}/usr/lib/spellfix`;
-		// 		} else {
-		// 			const appDir = process.execPath.substring(0, process.execPath.lastIndexOf('/'));
-		// 			this.database_.extensionToLoad = `${appDir}/usr/lib/spellfix`;
-		// 		}
-		// 	}
-		// }
-
 		await this.database_.open({ name: `${profileDir}/database.sqlite` });

 		// if (Setting.value('env') === 'dev') await this.database_.clearForTesting();
@ -774,19 +758,6 @@ export default class BaseApplication {
 			setLocale(Setting.value('locale'));
 		}

-		// if (Setting.value('db.fuzzySearchEnabled') === -1) {
-		// 	const fuzzySearchEnabled = await this.database_.fuzzySearchEnabled();
-		// 	Setting.setValue('db.fuzzySearchEnabled', fuzzySearchEnabled ? 1 : 0);
-		// }
-
-		// // Always disable on CLI because building and packaging the extension is not working
-		// // and is too error-prone - requires gcc on the machine, or we should package the .so
-		// // and dylib files, but it's not sure it would work everywhere if not built from
-		// // source on the target machine.
-		// if (Setting.value('appType') !== 'desktop') {
-		// 	Setting.setValue('db.fuzzySearchEnabled', 0);
-		// }
-
 		// For now always disable fuzzy search due to performance issues:
 		// https://discourse.joplinapp.org/t/1-1-4-keyboard-locks-up-while-typing/11231/11
 		// https://discourse.joplinapp.org/t/serious-lagging-when-there-are-tens-of-thousands-of-notes/11215/23
--- a/packages/lib/JoplinDatabase.ts
+++ b/packages/lib/JoplinDatabase.ts
@ -140,8 +140,6 @@ export default class JoplinDatabase extends Database {

 	constructor(driver: any) {
 		super(driver);
-
-		// this.extensionToLoad = './build/lib/sql-extensions/spellfix';
 	}

 	initialized() {
@ -933,15 +931,6 @@ export default class JoplinDatabase extends Database {
 	async initialize() {
 		this.logger().info('Checking for database schema update...');

-		// try {
-		// 	// Note that the only extension that can be loaded as of now is spellfix.
-		// 	// If it fails here, it will fail on the fuzzySearchEnabled() check above
-		// 	// too, thus disabling spellfix for the app.
-		// 	await this.loadExtension(this.extensionToLoad);
-		// } catch (error) {
-		// 	this.logger().error(error);
-		// }
-
 		let versionRow = null;
 		try {
 			// Will throw if the database has not been created yet, but this is handled below
--- a/packages/lib/services/searchengine/SearchEngine.ts
+++ b/packages/lib/services/searchengine/SearchEngine.ts
@ -18,7 +18,6 @@ export default class SearchEngine {
 	public static SEARCH_TYPE_AUTO = 'auto';
 	public static SEARCH_TYPE_BASIC = 'basic';
 	public static SEARCH_TYPE_FTS = 'fts';
-	public static SEARCH_TYPE_FTS_FUZZY = 'fts_fuzzy';

 	public dispatch: Function = (_o: any) => {};
 	private logger_ = new Logger();
@ -88,11 +87,6 @@ export default class SearchEngine {
 				);
 			}

-			if (!noteIds.length && (Setting.value('db.fuzzySearchEnabled') === 1)) {
-				// On the last loop
-				queries.push({ sql: 'INSERT INTO notes_spellfix(word,rank) SELECT term, documents FROM search_aux WHERE col=\'*\'' });
-			}
-
 			await this.db().transactionExecBatch(queries);
 		}

@ -157,16 +151,9 @@ export default class SearchEngine {
 					[BaseModel.TYPE_NOTE, lastChangeId]
 				);

-				const queries = [];
+				if (!changes.length) break;

-				if (!changes.length) {
-					if (Setting.value('db.fuzzySearchEnabled') === 1) {
-						queries.push({ sql: 'DELETE FROM notes_spellfix' });
-						queries.push({ sql: 'INSERT INTO notes_spellfix(word,rank) SELECT term, documents FROM search_aux WHERE col=\'*\'' });
-						await this.db().transactionExecBatch(queries);
-					}
-					break;
-				}
+				const queries = [];

 				const noteIds = changes.map(a => a.item_id);
 				const notes = await Note.modelSelectAll(`
@ -273,7 +260,7 @@ export default class SearchEngine {



-	calculateWeightBM25_(rows: any[], fuzzyScore: any) {
+	calculateWeightBM25_(rows: any[]) {
 		// https://www.sqlite.org/fts3.html#matchinfo
 		// pcnalx are the arguments passed to matchinfo
 		// p - The number of matchable phrases in the query.
@ -352,20 +339,14 @@ export default class SearchEngine {
 		for (let i = 0; i < rows.length; i++) {
 			const row = rows[i];
 			row.weight = 0;
-			row.fuzziness = 1000;
-			row.wordFound = [];
 			for (let j = 0; j < numPhrases; j++) {
-				let found = false;
 				columns.forEach(column => {
 					const rowsWithHits = docsWithHits(X[i], column, j);
 					const frequencyHits = hitsThisRow(X[i], column, j);
 					const idf = IDF(rowsWithHits, numRows);
-					found = found ? found : (frequencyHits > 0);

 					row.weight += BM25(idf, frequencyHits, numTokens[column][i], avgTokens[column]);
-					row.fuzziness = (frequencyHits > 0) ? Math.min(row.fuzziness, fuzzyScore[j]) : row.fuzziness;
 				});
-				row.wordFound.push(found);
 			}

 			row.weight += weightForDaysSinceLastUpdate(row);
@ -392,35 +373,18 @@ export default class SearchEngine {
 	}

 	processResults_(rows: any[], parsedQuery: any, isBasicSearchResults = false) {
-		const rowContainsAllWords = (wordsFound: any, numFuzzyMatches: any) => {
-			let start = 0;
-			let end = 0;
-			for (let i = 0; i < numFuzzyMatches.length; i++) {
-				end = end + numFuzzyMatches[i];
-				if (!(wordsFound.slice(start, end).find((x: any) => x))) {
-					// This note doesn't contain any fuzzy matches for the word
-					return false;
-				}
-				start = end;
-			}
-			return true;
-		};
-
 		if (isBasicSearchResults) {
 			this.processBasicSearchResults_(rows, parsedQuery);
 		} else {
-			this.calculateWeightBM25_(rows, parsedQuery.fuzzyScore);
+			this.calculateWeightBM25_(rows);
 			for (let i = 0; i < rows.length; i++) {
 				const row = rows[i];
-				row.include = (parsedQuery.fuzzy && !parsedQuery.any) ? rowContainsAllWords(row.wordFound, parsedQuery.numFuzzyMatches) : true;
 				const offsets = row.offsets.split(' ').map((o: any) => Number(o));
 				row.fields = this.fieldNamesFromOffsets_(offsets);
 			}
 		}

 		rows.sort((a, b) => {
-			if (a.fuzziness < b.fuzziness) return -1;
-			if (a.fuzziness > b.fuzziness) return +1;
 			if (a.fields.includes('title') && !b.fields.includes('title')) return -1;
 			if (!a.fields.includes('title') && b.fields.includes('title')) return +1;
 			if (a.weight < b.weight) return +1;
@ -448,22 +412,11 @@ export default class SearchEngine {
 		return regexString;
 	}

-	async fuzzifier(words: string[]) {
-		const fuzzyMatches: any[] = [];
-		words.forEach(word => {
-			const fuzzyWords = this.db().selectAll('SELECT word, score FROM notes_spellfix WHERE word MATCH ? AND top=3', [word]);
-			fuzzyMatches.push(fuzzyWords);
-		});
-		return await Promise.all(fuzzyMatches);
-	}
-
-	async parseQuery(query: string, fuzzy: any = null) {
-		if (fuzzy === null) fuzzy = Setting.value('db.fuzzySearchEnabled') === 1;
+	async parseQuery(query: string) {

 		const trimQuotes = (str: string) => str.startsWith('"') ? str.substr(1, str.length - 2) : str;

 		let allTerms: any[] = [];
-		let allFuzzyTerms = [];

 		try {
 			allTerms = filterParser(query);
@ -471,81 +424,11 @@ export default class SearchEngine {
 			console.warn(error);
 		}

-		const textTerms = allTerms.filter(x => x.name === 'text' && !x.negated);
-		const titleTerms = allTerms.filter(x => x.name === 'title' && !x.negated);
-		const bodyTerms = allTerms.filter(x => x.name === 'body' && !x.negated);
+		const textTerms = allTerms.filter(x => x.name === 'text' && !x.negated).map(x => trimQuotes(x.value));
+		const titleTerms = allTerms.filter(x => x.name === 'title' && !x.negated).map(x => trimQuotes(x.value));
+		const bodyTerms = allTerms.filter(x => x.name === 'body' && !x.negated).map(x => trimQuotes(x.value));

-		const fuzzyScore = [];
-		let numFuzzyMatches = [];
-		let terms: any = null;
-
-		if (fuzzy) {
-			const fuzzyText = await this.fuzzifier(textTerms.filter(x => !(x.quoted || x.wildcard)).map(x => trimQuotes(x.value)));
-			const fuzzyTitle = await this.fuzzifier(titleTerms.filter(x => !x.wildcard).map(x => trimQuotes(x.value)));
-			const fuzzyBody = await this.fuzzifier(bodyTerms.filter(x => !x.wildcard).map(x => trimQuotes(x.value)));
-
-			// Floor the fuzzy scores to 0, 1 and 2.
-			const floorFuzzyScore = (matches: any) => {
-				for (let i = 0; i < matches.length; i++) matches[i].score = i;
-			};
-
-			fuzzyText.forEach(floorFuzzyScore);
-			fuzzyTitle.forEach(floorFuzzyScore);
-			fuzzyBody.forEach(floorFuzzyScore);
-
-			const phraseTextSearch = textTerms.filter(x => x.quoted);
-			const wildCardSearch = textTerms.concat(titleTerms).concat(bodyTerms).filter(x => x.wildcard);
-
-			// Save number of fuzzy matches we got for each word
-			// fuzzifier() is currently set to return at most 3 matches
-			// We need to know which fuzzy words go together so that we can filter out notes that don't contain a required word.
-			numFuzzyMatches = fuzzyText.concat(fuzzyTitle).concat(fuzzyBody).map(x => x.length);
-			for (let i = 0; i < phraseTextSearch.length + wildCardSearch.length; i++) {
-				// Phrase searches and wildcard searches are preserved without fuzzification (A single match)
-				numFuzzyMatches.push(1);
-			}
-
-			const mergedFuzzyText = [].concat.apply([], fuzzyText);
-			const mergedFuzzyTitle = [].concat.apply([], fuzzyTitle);
-			const mergedFuzzyBody = [].concat.apply([], fuzzyBody);
-
-			const fuzzyTextTerms = mergedFuzzyText.map(x => { return { name: 'text', value: x.word, negated: false, score: x.score }; });
-			const fuzzyTitleTerms = mergedFuzzyTitle.map(x => { return { name: 'title', value: x.word, negated: false, score: x.score }; });
-			const fuzzyBodyTerms = mergedFuzzyBody.map(x => { return { name: 'body', value: x.word, negated: false, score: x.score }; });
-
-			// Remove previous text, title and body and replace with fuzzy versions
-			allTerms = allTerms.filter(x => (x.name !== 'text' && x.name !== 'title' && x.name !== 'body'));
-
-			// The order matters here!
-			// The text goes first, then title, then body, then phrase and finally wildcard
-			// This is because it needs to match with numFuzzyMathches.
-			allFuzzyTerms = allTerms.concat(fuzzyTextTerms).concat(fuzzyTitleTerms).concat(fuzzyBodyTerms).concat(phraseTextSearch).concat(wildCardSearch);
-
-			const allTextTerms = allFuzzyTerms.filter(x => x.name === 'title' || x.name === 'body' || x.name === 'text');
-			for (let i = 0; i < allTextTerms.length; i++) {
-				// Phrase searches and wildcard searches will get a fuzziness score of zero.
-				// This means that they will go first in the sort order (Even if there are other words with matches in the title)
-				// Undesirable?
-				fuzzyScore.push(allFuzzyTerms[i].score ? allFuzzyTerms[i].score : 0);
-			}
-
-			const wildCardTextTerms = wildCardSearch.filter(x => x.name === 'text').map(x =>trimQuotes(x.value));
-			const wildCardTitleTerms = wildCardSearch.filter(x => x.name === 'title').map(x =>trimQuotes(x.value));
-			const wildCardBodyTerms = wildCardSearch.filter(x => x.name === 'body').map(x =>trimQuotes(x.value));
-			const phraseTextTerms = phraseTextSearch.map(x => trimQuotes(x.value));
-
-			terms = {
-				_: fuzzyTextTerms.map(x => trimQuotes(x.value)).concat(phraseTextTerms).concat(wildCardTextTerms),
-				title: fuzzyTitleTerms.map(x => trimQuotes(x.value)).concat(wildCardTitleTerms),
-				body: fuzzyBodyTerms.map(x => trimQuotes(x.value)).concat(wildCardBodyTerms),
-			};
-		} else {
-			const nonNegatedTextTerms = textTerms.length + titleTerms.length + bodyTerms.length;
-			for (let i = 0; i < nonNegatedTextTerms; i++) {
-				fuzzyScore.push(0);
-			}
-			terms = { _: textTerms.map(x =>trimQuotes(x.value)), 'title': titleTerms.map(x =>trimQuotes(x.value)), 'body': bodyTerms.map(x =>trimQuotes(x.value)) };
-		}
+		const terms: any = { _: textTerms, 'title': titleTerms, 'body': bodyTerms };

 		// Filter terms:
 		// - Convert wildcards to regex
@ -603,10 +486,7 @@ export default class SearchEngine {
 			termCount: termCount,
 			keys: keys,
 			terms: terms, // text terms
-			allTerms: fuzzy ? allFuzzyTerms : allTerms,
-			fuzzyScore: fuzzyScore,
-			numFuzzyMatches: numFuzzyMatches,
-			fuzzy: fuzzy,
+			allTerms: allTerms,
 			any: !!allTerms.find(term => term.name === 'any'),
 		};
 	}
@ -651,8 +531,8 @@ export default class SearchEngine {
 		return Note.previews(null, searchOptions);
 	}

-	determineSearchType_(query: string, options: any) {
-		if (options.searchType === SearchEngine.SEARCH_TYPE_BASIC) return SearchEngine.SEARCH_TYPE_BASIC;
+	determineSearchType_(query: string, preferredSearchType: any) {
+		if (preferredSearchType === SearchEngine.SEARCH_TYPE_BASIC) return SearchEngine.SEARCH_TYPE_BASIC;

 		// If preferredSearchType is "fts" we auto-detect anyway
 		// because it's not always supported.
@ -669,12 +549,9 @@ export default class SearchEngine {

 		if (!Setting.value('db.ftsEnabled') || ['ja', 'zh', 'ko', 'th'].indexOf(st) >= 0) {
 			return SearchEngine.SEARCH_TYPE_BASIC;
-		} else if (options.fuzzy) {
-			return SearchEngine.SEARCH_TYPE_FTS_FUZZY;
-		} else {
-			return SearchEngine.SEARCH_TYPE_FTS;
 		}

+		return SearchEngine.SEARCH_TYPE_FTS;
 	}

 	async search(searchString: string, options: any = null) {
@ -682,35 +559,30 @@ export default class SearchEngine {

 		options = Object.assign({}, {
 			searchType: SearchEngine.SEARCH_TYPE_AUTO,
-			fuzzy: Setting.value('db.fuzzySearchEnabled') === 1,
 		}, options);

-		const searchType = this.determineSearchType_(searchString, options);
+		const searchType = this.determineSearchType_(searchString, options.searchType);
+		const parsedQuery = await this.parseQuery(searchString);

 		if (searchType === SearchEngine.SEARCH_TYPE_BASIC) {
 			// Non-alphabetical languages aren't support by SQLite FTS (except with extensions which are not available in all platforms)
 			searchString = this.normalizeText_(searchString);
 			const rows = await this.basicSearch(searchString);
-			const parsedQuery = await this.parseQuery(searchString);
+
 			this.processResults_(rows, parsedQuery, true);
 			return rows;
 		} else {
-			// SEARCH_TYPE_FTS or SEARCH_TYPE_FTS_FUZZY
+			// SEARCH_TYPE_FTS
 			// FTS will ignore all special characters, like "-" in the index. So if
 			// we search for "this-phrase" it won't find it because it will only
 			// see "this phrase" in the index. Because of this, we remove the dashes
 			// when searching.
 			// https://github.com/laurent22/joplin/issues/1075#issuecomment-459258856

-			const parsedQuery = await this.parseQuery(searchString, searchType === SearchEngine.SEARCH_TYPE_FTS_FUZZY);
-
 			try {
-				const { query, params } = queryBuilder(parsedQuery.allTerms, searchType === SearchEngine.SEARCH_TYPE_FTS_FUZZY);
+				const { query, params } = queryBuilder(parsedQuery.allTerms);
 				const rows = await this.db().selectAll(query, params);
 				this.processResults_(rows, parsedQuery);
-				if (searchType === SearchEngine.SEARCH_TYPE_FTS_FUZZY && !parsedQuery.any) {
-					return rows.filter((row: any) => row.include);
-				}
 				return rows;
 			} catch (error) {
 				this.logger().warn(`Cannot execute MATCH query: ${searchString}: ${error.message}`);
--- a/packages/lib/services/searchengine/queryBuilder.ts
+++ b/packages/lib/services/searchengine/queryBuilder.ts
@ -300,7 +300,7 @@ const sourceUrlFilter = (terms: Term[], conditons: string[], params: string[], r
 };


-const textFilter = (terms: Term[], conditions: string[], params: string[], relation: Relation, fuzzy: Boolean) => {
+const textFilter = (terms: Term[], conditions: string[], params: string[], relation: Relation) => {
 	const addExcludeTextConditions = (excludedTerms: Term[], conditions: string[], params: string[], relation: Relation) => {
 		const type = excludedTerms[0].name === 'text' ? '' : `.${excludedTerms[0].name}`;

@ -342,7 +342,7 @@ const textFilter = (terms: Term[], conditions: string[], params: string[], relat
 			if (term.name === 'text') return term.value;
 			else return `${term.name}:${term.value}`;
 		});
-		const matchQuery = (fuzzy || (relation === 'OR')) ? termsToMatch.join(' OR ') : termsToMatch.join(' ');
+		const matchQuery = (relation === 'OR') ? termsToMatch.join(' OR ') : termsToMatch.join(' ');
 		params.push(matchQuery);
 	}

@ -374,7 +374,7 @@ const getConnective = (terms: Term[], relation: Relation): string => {
 	return (!notebookTerm && (relation === 'OR')) ? 'ROWID=-1' : '1'; // ROWID=-1 acts as 0 (something always false)
 };

-export default function queryBuilder(terms: Term[], fuzzy: boolean) {
+export default function queryBuilder(terms: Term[]) {
 	const queryParts: string[] = [];
 	const params: string[] = [];
 	const withs: string[] = [];
@ -402,7 +402,7 @@ export default function queryBuilder(terms: Term[], fuzzy: boolean) {

 	resourceFilter(terms, queryParts, params, relation, withs);

-	textFilter(terms, queryParts, params, relation, fuzzy);
+	textFilter(terms, queryParts, params, relation);


 	typeFilter(terms, queryParts, params, relation);