Updated to Tesseract.js v.3; added exif-based rotation (#638)

* Updated to Tesseract.js v.3

* Updated exif-based rotation

* Fixes relating to linter

* Removed node 12 from workflows
master
Balearica 2 years ago committed by GitHub
parent a9ac00ccac
commit 13b95f6371
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 4
      .eslintrc
  2. 2
      .github/workflows/node.js.yml
  3. 18164
      package-lock.json
  4. 11
      package.json
  5. 2
      src/worker-script/index.js
  6. 6
      src/worker-script/utils/setImage.js
  7. 5
      src/worker/node/loadImage.js
  8. 2
      tests/constants.js

@ -1,5 +1,6 @@
{
"extends": "airbnb-base",
"parser": "babel-eslint",
"env": {
"browser": true,
"node": true,
@ -10,6 +11,7 @@
"no-underscore-dangle": 0,
"no-console": 0,
"global-require": 0,
"camelcase": 0
"camelcase": 0,
"no-control-regex": 0
}
}

@ -16,7 +16,7 @@ jobs:
strategy:
matrix:
node-version: [12.x, 14.x, 16.x]
node-version: [14.x, 16.x]
steps:
- uses: actions/checkout@v2

18164
package-lock.json generated

File diff suppressed because it is too large Load Diff

@ -35,10 +35,10 @@
],
"license": "Apache-2.0",
"devDependencies": {
"@babel/core": "^7.7.7",
"@babel/preset-env": "^7.7.7",
"@babel/core": "^7.18.7",
"@babel/preset-env": "^7.18.7",
"acorn": "^6.4.0",
"babel-loader": "^8.1.0",
"babel-loader": "^8.2.0",
"cors": "^2.8.5",
"eslint": "^7.2.0",
"eslint-config-airbnb-base": "^14.2.0",
@ -57,18 +57,17 @@
"webpack-dev-middleware": "^3.7.2"
},
"dependencies": {
"blueimp-load-image": "^3.0.0",
"babel-eslint": "^10.1.0",
"bmp-js": "^0.1.0",
"file-type": "^12.4.1",
"idb-keyval": "^3.2.0",
"is-electron": "^2.2.0",
"is-url": "^1.2.4",
"jpeg-autorotate": "^7.1.1",
"node-fetch": "^2.6.0",
"opencollective-postinstall": "^2.0.2",
"regenerator-runtime": "^0.13.3",
"resolve-url": "^0.2.1",
"tesseract.js-core": "^2.2.0",
"tesseract.js-core": "^3.0.1",
"zlibjs": "^0.3.1"
},
"repository": {

@ -72,7 +72,7 @@ const loadLanguage = async ({
},
},
},
res) => {
res) => {
const loadAndGunzipFile = async (_lang) => {
const lang = typeof _lang === 'string' ? _lang : _lang.code;
const readCache = ['refresh', 'none'].includes(cacheMethod)

@ -17,6 +17,8 @@ module.exports = (TessModule, api, image) => {
let w = 0;
let h = 0;
const exif = buf.slice(0, 500).toString().match(/\x01\x12\x00\x03\x00\x00\x00\x01\x00(.)/)?.[1]?.charCodeAt(0) || 1;
/*
* Although leptonica should support reading bmp, there is a bug of "compressed BMP files".
* As there is no solution, we need to use bmp-js for now.
@ -53,9 +55,9 @@ module.exports = (TessModule, api, image) => {
*
*/
if (data === null) {
api.SetImage(pix);
api.SetImage(pix, undefined, undefined, undefined, undefined, exif);
} else {
api.SetImage(data, w, h, bytesPerPixel, w * bytesPerPixel);
api.SetImage(data, w, h, bytesPerPixel, w * bytesPerPixel, exif);
}
return data === null ? pix : data;
};

@ -2,7 +2,6 @@ const util = require('util');
const fs = require('fs');
const fetch = require('node-fetch');
const isURL = require('is-url');
const jo = require('jpeg-autorotate');
const readFile = util.promisify(fs.readFile);
@ -32,9 +31,5 @@ module.exports = async (image) => {
data = image;
}
try {
data = (await jo.rotate(data, { quality: 100 })).buffer;
} catch (_) {} /* eslint-disable-line */
return new Uint8Array(data);
};

@ -13,7 +13,7 @@ const SIMPLE_TEXT_HALF = 'Tesse\n';
const COMSIC_TEXT = 'HellO World\nfrom beyond\nthe Cosmic Void\n';
const TESTOCR_TEXT = 'This is a lot of 12 point text to test the\nocr code and see if it works on all types\nof file format.\n\nThe quick brown dog jumped over the\nlazy fox. The quick brown dog jumped\nover the lazy fox. The quick brown dog\njumped over the lazy fox. The quick\nbrown dog jumped over the lazy fox.\n';
const CHINESE_TEXT = '繁 體 中 文 測 試\n';
const BILL_SPACED_TEXT = 'FIRST CHEQUING\n\nLine of Credit 100,000.00 Rate 4.2000\n\nDate Description Number Debits Credits Balance\n31Jul2018 Balance Forward 99,878.08 -\n01Aug2018 Clearing Cheque 4987 36.07 99,914.15 -\n01Aug2018 Clearing Cheque 4986 60.93 99,975.08 -\n01Aug2018 Clearing Cheque 4982 800.04 100,775.12 EX\n01Aug2018 Clearing Cheque 4981 82334 101,598.46 EX\n01Aug2018 Incoming Interac e-Transfer 1454 101,583.92 EX\n01Aug2018 Incoming Interac e-Transfer 400.00 101,183.92 EX\n01Aug2018 Assisted Deposit 3241450 68,769.42 -\n01Aug2018 Transfer out to loan 7 1,500.00 70,269.42 -\n02Aug2018 Clearing Cheque 4984 48.08 70,317.50 -\n02Aug2018 Clearing Cheque 4985 7051 70,388.01 -\n02Aug2018 Clearing Cheque 4992 500.00 70.888.01 -\n';
const BILL_SPACED_TEXT = 'FIRST CHEQUING\n\nLine of Credit 100,000.00 Rate 4.2000\n\nDate Description Number Debits Credits Balance\n31Jul2018 Balance Forward 99,878.08 -\n01Aug2018 Clearing Cheque 4987 36.07 99,914.15 -\n01Aug2018 Clearing Cheque 4986 60.93 99,975.08 -\n01Aug2018 Clearing Cheque 4982 800.04 100,775.12 EX\n01Aug2018 Clearing Cheque 4981 823.34 101,598.46 EX\n01Aug2018 Incoming Interac e-Transfer 1454 101,583.92 EX\n01Aug2018 Incoming Interac e-Transfer 400.00 101,183.92 EX\n01Aug2018 Assisted Deposit 3241450 68,769.42 -\n01Aug2018 Transfer out to loan 7 1,500.00 70,269.42 -\n02Aug2018 Clearing Cheque 4984 48.08 70,317.50 -\n02Aug2018 Clearing Cheque 4985 7051 70,388.01 -\n02Aug2018 Clearing Cheque 4992 500.00 70.888.01 -\n';
const SIMPLE_WHITELIST_TEXT = 'Tesses\n';
const FORMATS = ['png', 'jpg', 'bmp', 'pbm'];
const SIMPLE_PNG_BASE64 = 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAUAAAAC0CAIAAABqhmJGAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAASuSURBVHhe7dftVdswAIbRzsVAzMM0XabDUCOUxLYsWW4Jp+/pvf9w9GH76CHw4x2IJWAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAI9p8G/PbyY8rL2686g8t+vnqHTyfgIYfvz/26veTXn/UKX8+f0EU9bHrtu/6KfAN/AwEXAj7lFf2TBFw4nae8on+SgIvJ01n/KLzpDK+L3bT/Ap4O+HC+V12mTH+M3gzcLbIY/EO6HfxYp13k09nb6r3UqcdnjoCL3ll72J26h+35Oxy2XvZ0wOLaXq9v2+F1UC+7RZtMZ/DnfX1lwDOPzwUCLo7O2trtDK8H3M/iqoc6bj1subT68XTA/F7bGJooyzKbhTvLPHY8eJLHlbNX1DqYUVfdXbqwJjsCLsans37aNNJM6w68OR0wv9f9ymKw3k67yn2ZZpHlg3a3zis60s6oV+ZvlzMCLoanc3Dsdt9TdWT/lM8OmNjr5KY72jmzq1zfrbvXtVtmRMDF8HTWcgaaqIrD1U4G/MFewxrW262s5jS/Fzpmdts6mnHy+Fwl4GJ0OjsNrG1P/y7CNo3+gEt7jW56MVprNed7A/5w+n6YJ+BieDpnj/jO6pweTz0acGWvmZveL9XOmd3x6wKuTt8PEwRczLRw4eje1XX7c/cDruw1uuneOu2c4aOvzI57mJhRh1xZlQ0BF+Oz9vcF96fuB1zYa7R2b5mD6/XSwdfg8snj4q21+W/L02dfzIxhQMDFyTm6Hd7m+JYP7rPKT5sRuzhOBywm91rUkYc3fV9ltchtr8VmzuGOdfDB9N1tFYefNfdXLmyGjNZkhoCLUQufVqd/7z7rUcLW/XieDvg0s9difNOdRV5ePibt5vTuazusWbF9rs2E5v4mH58LBFyMW7g5OID7s9cMuTygmt9rcNPb5MrAz0lHc3Z9Ht7XZsxqxO36ZtLR/c0+PpMEzLOc/4LhrwmYZ6lfywJ+JgHzJPr9DgLmi23/zdXvcwmYL7YKWL1PJ2AIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmCI9f7+G6yFxVg/GyYwAAAAAElFTkSuQmCC';

Loading…
Cancel
Save