diff --git a/package.json b/package.json index 251bce8..460381f 100644 --- a/package.json +++ b/package.json @@ -7,7 +7,7 @@ "start": "node scripts/server.js", "build": "webpack --config scripts/webpack.config.prod.js", "prepublishOnly": "npm run build", - "test:node": "nyc mocha --exit --require ./scripts/test-helper.js ./tests/*.test.js", + "test:node": "nyc mocha --exit --bail --require ./scripts/test-helper.js ./tests/*.test.js", "lint": "eslint src" }, "browser": { diff --git a/src/common/worker.js b/src/common/worker.js index 50faf0d..30b446f 100644 --- a/src/common/worker.js +++ b/src/common/worker.js @@ -21,19 +21,12 @@ const setImage = (image) => { }; const handleInit = (req, res) => { - let MIN_MEMORY = 100663296; - - if (['chi_sim', 'chi_tra', 'jpn'].includes(req.lang)) { - MIN_MEMORY = 167772160; - } - - if (!Module || Module.TOTAL_MEMORY < MIN_MEMORY) { + if (!Module) { const Core = adapter.getCore(req, res); res.progress({ status: 'initializing tesseract', progress: 0 }); return Core({ - // TOTAL_MEMORY: MIN_MEMORY, TesseractProgress(percent) { latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent - 30) / 70) }); }, diff --git a/tests/assets/images/chinese.png b/tests/assets/images/chinese.png new file mode 100644 index 0000000..643ee2f Binary files /dev/null and b/tests/assets/images/chinese.png differ diff --git a/tests/assets/traineddata/chi_tra.traineddata b/tests/assets/traineddata/chi_tra.traineddata new file mode 100644 index 0000000..5f1fe27 Binary files /dev/null and b/tests/assets/traineddata/chi_tra.traineddata differ diff --git a/tests/assets/traineddata/chi_tra.traineddata.gz b/tests/assets/traineddata/chi_tra.traineddata.gz new file mode 100644 index 0000000..591ece7 Binary files /dev/null and b/tests/assets/traineddata/chi_tra.traineddata.gz differ diff --git a/tests/recognize.test.js b/tests/recognize.test.js index 1c63a2f..06d2948 100644 --- a/tests/recognize.test.js +++ b/tests/recognize.test.js @@ -4,6 +4,7 @@ const IMAGE_PATH = 'http://localhost:3000/tests/assets/images'; const SIMPLE_TEXT = 'Tesseract.js\n'; const COMSIC_TEXT = 'HellO World\nfrom beyond\nthe Cosmic Void\n'; const TESTOCR_TEXT = 'This is a lot of 12 point text to test the\nocr code and see if it works on all types\nof file format.\n\nThe quick brown dog jumped over the\nlazy fox. The quick brown dog jumped\nover the lazy fox. The quick brown dog\njumped over the lazy fox. The quick\nbrown dog jumped over the lazy fox.\n'; +const CHINESE_TEXT = '繁 體 中 文 測 試\n'; const loadLangOptions = { langPath: 'http://localhost:3000/tests/assets/traineddata', @@ -19,19 +20,22 @@ const getWorker = options => ( ); before(function cb(done) { - this.timeout(10000); - loadLang({ - langs: 'eng', - cacheMethod: 'write', - langURI: loadLangOptions.langPath, - ...loadLangOptions, - }).then(() => { - if (typeof startServer !== 'undefined') { - startServer(done); - } else { + this.timeout(30000); + const load = () => ( + loadLang({ + langs: 'eng+chi_tra', + cacheMethod: 'write', + langURI: loadLangOptions.langPath, + ...loadLangOptions, + }).then(() => { done(); - } - }); + }) + ); + if (typeof startServer !== 'undefined') { + startServer(load); + } else { + load(); + } }); after((done) => { @@ -43,14 +47,31 @@ after((done) => { }); describe('recognize()', () => { - describe('supports multiple formats', () => { + describe('should recognize different langs', () => { + [ + { name: 'chinese.png', lang: 'chi_tra', ans: CHINESE_TEXT }, + ].forEach(({ name, lang, ans }) => ( + it(`recongize ${lang}`, (done) => { + const worker = getWorker(); + worker + .recognize(`${IMAGE_PATH}/${name}`, lang) + .then(({ text }) => { + expect(text).to.be(ans); + worker.terminate(); + done(); + }); + }).timeout(30000) + )); + }); + + describe('should read bmp, jpg, png and pbm format images', () => { ['bmp', 'jpg', 'png', 'pbm'].forEach(format => ( it(`support ${format} format`, (done) => { const worker = getWorker(); worker .recognize(`${IMAGE_PATH}/simple.${format}`) - .then((result) => { - expect(result.text).to.be(SIMPLE_TEXT); + .then(({ text }) => { + expect(text).to.be(SIMPLE_TEXT); worker.terminate(); done(); }); @@ -58,7 +79,7 @@ describe('recognize()', () => { )); }); - describe('1 worker multiple recognition', () => { + describe('should be able to recognize multiple images with 1 worker', () => { [3, 10, 20].forEach(num => ( it(`recognize ${num} images with 1 worker`, (done) => { const worker = getWorker(); @@ -75,7 +96,7 @@ describe('recognize()', () => { )); }); - describe('should recognize in order', () => { + describe('should recognize multiple images in order', () => { [1, 2].forEach((num) => { it(`recognize ${num * 2} images with 1 worker in order`, (done) => { const worker = getWorker();