From b7dd138ea4ae4e93ca8bc734ce6b9daf5a2faae2 Mon Sep 17 00:00:00 2001 From: Kevin Kwok Date: Fri, 7 Oct 2016 20:44:21 -0400 Subject: [PATCH 1/8] Update README.md --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index cba796d..a6262f1 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ > # UNDER CONTRUCTION -> ## Due for Release on ~~Tuesday, Oct 4, 2016~~ Friday, Oct 7, 2016 > Sorry for the delay! # [Tesseract.js](http://tesseract.projectnaptha.com/) @@ -281,4 +280,4 @@ After you've cloned the repo and run `npm install` as described in the [Developm ``` ### Send us a Pull Request! -Thanks :) \ No newline at end of file +Thanks :) From 8817639572218f39c39bc16ced1bd9bacd8fb5d1 Mon Sep 17 00:00:00 2001 From: Kevin Kwok Date: Fri, 7 Oct 2016 23:00:46 -0400 Subject: [PATCH 2/8] derp --- dist/tesseract.js | 10 +++++++++- dist/worker.js | 37 +++++++++++++++++++++---------------- src/browser/index.js | 3 ++- 3 files changed, 32 insertions(+), 18 deletions(-) diff --git a/dist/tesseract.js b/dist/tesseract.js index 4bc3575..117dbd4 100644 --- a/dist/tesseract.js +++ b/dist/tesseract.js @@ -3,7 +3,8 @@ exports.defaultOptions = { langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/', - workerPath: 'dist/worker.js', + // workerPath: 'dist/worker.js', + workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.1.0/dist/worker.js', tesseractPath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js' }; @@ -73,6 +74,13 @@ function loadImage(image, cb) { },{}],2:[function(require,module,exports){ "use strict"; +// The result of dump.js is a big JSON tree +// which can be easily serialized (for instance +// to be sent from a webworker to the main app +// or through Node's IPC), but we want +// a (circular) DOM-like interface for walking +// through the data. + module.exports = function circularize(page) { page.paragraphs = []; page.lines = []; diff --git a/dist/worker.js b/dist/worker.js index d68d7d1..dd3f60b 100644 --- a/dist/worker.js +++ b/dist/worker.js @@ -11937,8 +11937,6 @@ global.addEventListener('message', function (e) { }); }); -exports.getLanguageData = require('./lang.js'); - exports.getCore = function (req, res) { if (!global.TesseractCore) { res.progress({ status: 'loading tesseract core' }); @@ -11948,12 +11946,16 @@ exports.getCore = function (req, res) { return TesseractCore; }; +exports.getLanguageData = require('./lang.js'); + workerUtils.setAdapter(module.exports); }).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) },{"../common/worker.js":47,"./lang.js":42}],44:[function(require,module,exports){ 'use strict'; +// This converts an image to grayscale + module.exports = function desaturate(image) { var width, height; if (image.data) { @@ -11975,25 +11977,13 @@ module.exports = function desaturate(image) { dst[j] = (src[i] * 77 + src[i + 1] * 151 + src[i + 2] * 28) * src[i + 3] + (255 - src[i + 3] << 15) + 32768 >> 16; }image = dst; } else { - throw 'Expected ImageData'; + throw 'Invalid ImageData'; } return image; }; },{}],45:[function(require,module,exports){ -"use strict"; - -function deindent(html) { - var lines = html.split('\n'); - if (lines[0].substring(0, 2) === " ") { - for (var i = 0; i < lines.length; i++) { - if (lines[i].substring(0, 2) === " ") { - lines[i] = lines[i].slice(2); - } - }; - } - return lines.join('\n'); -} +'use strict'; module.exports = function DumpLiterallyEverything(Module, base) { var ri = base.GetIterator(); @@ -12148,6 +12138,21 @@ module.exports = function DumpLiterallyEverything(Module, base) { }; }; +// the generated HOCR is excessively indented, so +// we get rid of that indentation + +function deindent(html) { + var lines = html.split('\n'); + if (lines[0].substring(0, 2) === " ") { + for (var i = 0; i < lines.length; i++) { + if (lines[i].substring(0, 2) === " ") { + lines[i] = lines[i].slice(2); + } + }; + } + return lines.join('\n'); +} + },{}],46:[function(require,module,exports){ module.exports={"afr": 1079573, "ara": 1701536, "aze": 1420865, "bel": 1276820, "ben": 6772012, "bul": 1605615, "cat": 1652368, "ces": 1035441, "chi_sim": 17710414, "chi_tra": 24717749, "chr": 320649, "dan-frak": 677656, "dan": 1972936, "deu-frak": 822644, "deu": 991656, "ell": 859719, "eng": 9453554, "enm": 619254, "epo": 1241212, "equ": 821130, "est": 1905040, "eus": 1641190, "fin": 979418, "fra": 1376221, "frk": 5912963, "frm": 5147082, "glg": 1674938, "grc": 3012615, "heb": 1051501, "hin": 6590065, "hrv": 1926995, "hun": 3074473, "ind": 1874776, "isl": 1634041, "ita": 948593, "ita_old": 3436571, "jpn": 13507168, "kan": 4390317, "kor": 5353098, "lav": 1843944, "lit": 1779240, "mal": 5966263, "meme": 88453, "mkd": 1163087, "mlt": 1463001, "msa": 1665427, "nld": 1134708, "nor": 2191610, "osd": 4274649, "pol": 7024662, "por": 909359, "ron": 915680, "rus": 5969957, "slk-frak": 289885, "slk": 2217342, "slv": 1611338, "spa": 883170, "spa_old": 5647453, "sqi": 1667041, "srp": 1770244, "swa": 757916, "swe": 2451917, "tam": 3498763, "tel": 5795246, "tgl": 1496256, "tha": 3811136, "tur": 3563264, "ukr": 937566, "vie": 2195922} },{}],47:[function(require,module,exports){ diff --git a/src/browser/index.js b/src/browser/index.js index 7ac97fa..1518353 100644 --- a/src/browser/index.js +++ b/src/browser/index.js @@ -1,6 +1,7 @@ exports.defaultOptions = { langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/', - workerPath: 'dist/worker.js', + // workerPath: 'dist/worker.js', + workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.1.0/dist/worker.js', tesseractPath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js', } From 16d9e9d0881dbc44bbe8266e8305cf936dfe9142 Mon Sep 17 00:00:00 2001 From: Kevin Kwok Date: Fri, 7 Oct 2016 23:05:28 -0400 Subject: [PATCH 3/8] wumbo --- src/browser/index.js | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/browser/index.js b/src/browser/index.js index 1518353..032b307 100644 --- a/src/browser/index.js +++ b/src/browser/index.js @@ -6,7 +6,13 @@ exports.defaultOptions = { } exports.spawnWorker = function spawnWorker(instance, workerOptions){ - var worker = new Worker(workerOptions.workerPath) + if(window.Blob && window.URL){ + var blob = new Blob(['importScripts("' + workerOptions.workerPath + '");']) + var worker = new Worker(window.URL.createObjectURL(blob)); + }else{ + var worker = new Worker(workerOptions.workerPath) + } + worker.onmessage = function(e){ var packet = e.data; instance._recv(packet) From c2a06b4b12767a0e47994e9ca0c1a540df95aaa4 Mon Sep 17 00:00:00 2001 From: Kevin Kwok Date: Fri, 7 Oct 2016 23:07:50 -0400 Subject: [PATCH 4/8] import with cors --- dist/tesseract.js | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/dist/tesseract.js b/dist/tesseract.js index 117dbd4..1db4202 100644 --- a/dist/tesseract.js +++ b/dist/tesseract.js @@ -9,7 +9,13 @@ exports.defaultOptions = { }; exports.spawnWorker = function spawnWorker(instance, workerOptions) { - var worker = new Worker(workerOptions.workerPath); + if (window.Blob && window.URL) { + var blob = new Blob(['importScripts("' + workerOptions.workerPath + '");']); + var worker = new Worker(window.URL.createObjectURL(blob)); + } else { + var worker = new Worker(workerOptions.workerPath); + } + worker.onmessage = function (e) { var packet = e.data; instance._recv(packet); From cb6419203f92c148b9646acc3461efa9e7e6eebc Mon Sep 17 00:00:00 2001 From: Kevin Kwok Date: Fri, 7 Oct 2016 23:09:47 -0400 Subject: [PATCH 5/8] Fixing opts --- dist/worker.js | 1 + src/common/worker.js | 1 + 2 files changed, 2 insertions(+) diff --git a/dist/worker.js b/dist/worker.js index dd3f60b..4363c3e 100644 --- a/dist/worker.js +++ b/dist/worker.js @@ -12245,6 +12245,7 @@ function handleRecognize(req, res) { base.Init(null, lang); res.progress({ status: 'initialized with language' }); + var options = req.options; for (var option in options) { if (options.hasOwnProperty(option)) { base.SetVariable(option, options[option]); diff --git a/src/common/worker.js b/src/common/worker.js index ab9b1d7..b6e4a4f 100644 --- a/src/common/worker.js +++ b/src/common/worker.js @@ -91,6 +91,7 @@ function handleRecognize(req, res){ base.Init(null, lang) res.progress({ status: 'initialized with language' }) + var options = req.options; for (var option in options) { if (options.hasOwnProperty(option)) { base.SetVariable(option, options[option]); From 1331ef8f18a3217795811b13ed5782df0bb34d33 Mon Sep 17 00:00:00 2001 From: Kevin Kwok Date: Fri, 7 Oct 2016 23:12:03 -0400 Subject: [PATCH 6/8] this is only mildly annoying --- dist/tesseract.js | 2 +- src/browser/index.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dist/tesseract.js b/dist/tesseract.js index 1db4202..6c458ed 100644 --- a/dist/tesseract.js +++ b/dist/tesseract.js @@ -4,7 +4,7 @@ exports.defaultOptions = { langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/', // workerPath: 'dist/worker.js', - workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.1.0/dist/worker.js', + workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.1.3/dist/worker.js', tesseractPath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js' }; diff --git a/src/browser/index.js b/src/browser/index.js index 032b307..d49d25f 100644 --- a/src/browser/index.js +++ b/src/browser/index.js @@ -1,7 +1,7 @@ exports.defaultOptions = { langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/', // workerPath: 'dist/worker.js', - workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.1.0/dist/worker.js', + workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.1.3/dist/worker.js', tesseractPath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js', } From 090d0c786e47a28c403cfee105439bad87724974 Mon Sep 17 00:00:00 2001 From: Kevin Kwok Date: Fri, 7 Oct 2016 23:25:06 -0400 Subject: [PATCH 7/8] undocument languiches --- src/index.js | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/index.js b/src/index.js index 4ad3357..9bf2830 100644 --- a/src/index.js +++ b/src/index.js @@ -18,8 +18,13 @@ class TesseractWorker { recognize(image, options){ return this._delay(job => { - options = options || {} - options.lang = options.lang || 'eng'; + if(typeof options === 'string'){ + options = { lang: options }; + }else{ + options = options || {} + options.lang = options.lang || 'eng'; + } + job._send('recognize', { image: image, options: options, workerOptions: this.workerOptions }) }) } From dbf43bbe601a9ab559230cda3c0f0bdb8af433aa Mon Sep 17 00:00:00 2001 From: Kevin Kwok Date: Fri, 7 Oct 2016 23:25:17 -0400 Subject: [PATCH 8/8] update dist --- dist/tesseract.js | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/dist/tesseract.js b/dist/tesseract.js index 6c458ed..942fbcc 100644 --- a/dist/tesseract.js +++ b/dist/tesseract.js @@ -174,8 +174,13 @@ var TesseractWorker = function () { var _this = this; return this._delay(function (job) { - options = options || {}; - options.lang = options.lang || 'eng'; + if (typeof options === 'string') { + options = { lang: options }; + } else { + options = options || {}; + options.lang = options.lang || 'eng'; + } + job._send('recognize', { image: image, options: options, workerOptions: _this.workerOptions }); }); }