From 1fcfa9c591940a3763b502d3f70b328104208977 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Fri, 22 Dec 2023 21:20:29 +0000 Subject: [PATCH] Update dependency tesseract.js to v5 (#9575) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> Co-authored-by: Laurent Cozic --- packages/app-desktop/package.json | 2 +- packages/lib/package.json | 2 +- .../ocr/drivers/OcrDriverTesseract.ts | 7 ++----- yarn.lock | 21 ++++++++++++------- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/packages/app-desktop/package.json b/packages/app-desktop/package.json index f1e9872d2f..0cc73ef3ac 100644 --- a/packages/app-desktop/package.json +++ b/packages/app-desktop/package.json @@ -189,7 +189,7 @@ "styled-components": "5.3.11", "styled-system": "5.1.5", "taboverride": "4.0.3", - "tesseract.js": "4.1.4", + "tesseract.js": "5.0.0", "tinymce": "5.10.6" } } diff --git a/packages/lib/package.json b/packages/lib/package.json index 4ba100ce08..ea8cc43821 100644 --- a/packages/lib/package.json +++ b/packages/lib/package.json @@ -28,7 +28,7 @@ "jest": "29.7.0", "pdfjs-dist": "3.11.174", "sharp": "0.33.0", - "tesseract.js": "4.1.4", + "tesseract.js": "5.0.0", "typescript": "5.2.2" }, "dependencies": { diff --git a/packages/lib/services/ocr/drivers/OcrDriverTesseract.ts b/packages/lib/services/ocr/drivers/OcrDriverTesseract.ts index 7001ff7fff..7f5632ae14 100644 --- a/packages/lib/services/ocr/drivers/OcrDriverTesseract.ts +++ b/packages/lib/services/ocr/drivers/OcrDriverTesseract.ts @@ -1,5 +1,5 @@ import { RecognizeResult, RecognizeResultBoundingBox, RecognizeResultLine, RecognizeResultWord } from '../utils/types'; -import { Worker, WorkerOptions, createWorker, RecognizeResult as TesseractRecognizeResult } from 'tesseract.js'; +import { Worker, WorkerOptions, createWorker, RecognizeResult as TesseractRecognizeResult, OEM } from 'tesseract.js'; import OcrDriverBase from '../OcrDriverBase'; import { Minute } from '@joplin/utils/time'; import shim from '../../../shim'; @@ -59,10 +59,7 @@ export default class OcrDriverTesseract extends OcrDriverBase { if (this.workerPath_) createWorkerOptions.workerPath = this.workerPath_; if (this.corePath_) createWorkerOptions.corePath = this.corePath_; - const worker = await this.tesseract_.createWorker(createWorkerOptions); - - await worker.loadLanguage(language); - await worker.initialize(language); + const worker = await this.tesseract_.createWorker(language, OEM.LSTM_ONLY, createWorkerOptions); const output: WorkerWrapper = { id: workerId_++, diff --git a/yarn.lock b/yarn.lock index 9ba4927a0c..eff4f43439 100644 --- a/yarn.lock +++ b/yarn.lock @@ -6585,7 +6585,7 @@ __metadata: styled-components: 5.3.11 styled-system: 5.1.5 taboverride: 4.0.3 - tesseract.js: 4.1.4 + tesseract.js: 5.0.0 tinymce: 5.10.6 ts-node: 10.9.1 typescript: 5.2.2 @@ -6891,7 +6891,7 @@ __metadata: string-to-stream: 3.0.1 tar: 6.2.0 tcp-port-used: 1.0.2 - tesseract.js: 4.1.4 + tesseract.js: 5.0.0 typescript: 5.2.2 uglifycss: 0.0.29 url-parse: 1.5.10 @@ -40249,6 +40249,13 @@ __metadata: languageName: node linkType: hard +"tesseract.js-core@npm:^5.0.0": + version: 5.0.0 + resolution: "tesseract.js-core@npm:5.0.0" + checksum: 5d66291078ecf36f2e957e195bc82c4b313136bebf64cfc974e188226a524690785c6f3561ce4cc42f6060190318d3a08c6fee88dade19d77e70f50badd59d7e + languageName: node + linkType: hard + "tesseract.js@npm:*": version: 4.1.2 resolution: "tesseract.js@npm:4.1.2" @@ -40267,9 +40274,9 @@ __metadata: languageName: node linkType: hard -"tesseract.js@npm:4.1.4": - version: 4.1.4 - resolution: "tesseract.js@npm:4.1.4" +"tesseract.js@npm:5.0.0": + version: 5.0.0 + resolution: "tesseract.js@npm:5.0.0" dependencies: bmp-js: ^0.1.0 idb-keyval: ^6.2.0 @@ -40278,10 +40285,10 @@ __metadata: node-fetch: ^2.6.9 opencollective-postinstall: ^2.0.3 regenerator-runtime: ^0.13.3 - tesseract.js-core: ^4.0.4 + tesseract.js-core: ^5.0.0 wasm-feature-detect: ^1.2.11 zlibjs: ^0.3.1 - checksum: 6a448f01d8e6f63fe1f99775dbf95214af3962ea1f697144e3887559c6969c89645bb01c6391e889d848ff949d431b67bdd6b94f77b9193dd48e3914415a5b0e + checksum: 0d4e322802322c4ed3639c1f49abdb860d6c4e821f3a8fe3398de153e331702d8360c9e5a4f00c2f6e190096043bfc5b098027effd6c1b23d021b0bed559802a languageName: node linkType: hard