Update tests

master
Jerome Wu 5 years ago
parent a31424e3e7
commit a470b836d5
  1. 3
      package.json
  2. 2
      scripts/server.js
  3. 5
      scripts/test-helper.js
  4. 18
      src/createJob.js
  5. 37
      src/createScheduler.js
  6. 74
      src/createWorker.js
  7. 3
      src/utils/getId.js
  8. 13
      src/worker-script/index.js
  9. 4
      src/worker/browser/defaultOptions.js
  10. 18
      src/worker/browser/defaultOptions.js~
  11. 4
      src/worker/browser/loadImage.js
  12. 33
      tests/constants.js
  13. 3
      tests/detect.test.html
  14. 46
      tests/detect.test.js
  15. 3
      tests/recognize.test.html
  16. 296
      tests/recognize.test.js
  17. 18
      tests/scheduler.test.html
  18. 35
      tests/scheduler.test.js

@ -10,13 +10,14 @@
"start": "node scripts/server.js",
"build": "rimraf dist && webpack --config scripts/webpack.config.prod.js",
"prepublishOnly": "npm run build",
"wait": "wait-on http://localhost:3000/package.json",
"wait": "rimraf dist && wait-on http://localhost:3000/dist/tesseract.dev.js",
"test": "npm-run-all -p -r start test:all",
"test:all": "npm-run-all wait test:browser:* test:node",
"test:node": "nyc mocha --exit --bail --require ./scripts/test-helper.js ./tests/*.test.js",
"test:browser-tpl": "mocha-headless-chrome -a incognito -a no-sandbox -a disable-setuid-sandbox -t 300000",
"test:browser:detect": "npm run test:browser-tpl -- -f ./tests/detect.test.html",
"test:browser:recognize": "npm run test:browser-tpl -- -f ./tests/recognize.test.html",
"test:browser:scheduler": "npm run test:browser-tpl -- -f ./tests/scheduler.test.html",
"lint": "eslint src",
"postinstall": "opencollective-postinstall || true"
},

@ -10,7 +10,7 @@ const app = express();
app.use(cors());
app.use('/', express.static(path.resolve(__dirname, '..')));
app.use(middleware(compiler, { publicPath: '/dist' }));
app.use(middleware(compiler, { publicPath: '/dist', writeToDisk: true }));
module.exports = app.listen(3000, () => {
console.log('Server is running on port 3000');

@ -1,4 +1,9 @@
const constants = require('../tests/constants');
global.expect = require('expect.js');
global.fs = require('fs');
global.path = require('path');
global.Tesseract = require('../src');
Object.keys(constants).forEach((key) => {
global[key] = constants[key];
});

@ -1,11 +1,17 @@
let jobCounter = 1;
const getId = require('./utils/getId');
module.exports = (
let jobCounter = 0;
module.exports = ({
id: _id,
action,
payload,
) => {
const id = `Job-${jobCounter}-${Math.random().toString(16).slice(3, 8)}`;
jobCounter += 1;
payload = {},
}) => {
let id = _id;
if (typeof id === 'undefined') {
id = getId('Job', jobCounter);
jobCounter += 1;
}
return {
id,

@ -1,8 +1,20 @@
const createJob = require('./createJob');
const log = require('./utils/log');
const getId = require('./utils/getId');
let schedulerCounter = 0;
module.exports = () => {
const id = getId('Scheduler', schedulerCounter);
const workers = {};
const runningWorkers = {};
let jobQueue = [];
schedulerCounter += 1;
const getQueueLen = () => jobQueue.length;
const getNumWorkers = () => Object.keys(workers).length;
const dequeue = () => {
if (jobQueue.length !== 0) {
const wIds = Object.keys(workers);
@ -17,11 +29,12 @@ module.exports = () => {
const queue = (action, payload) => (
new Promise((resolve, reject) => {
const job = createJob({ action, payload });
jobQueue.push(async (w) => {
jobQueue.shift();
runningWorkers[w.id] = true;
runningWorkers[w.id] = job;
try {
resolve(await w[action].apply(this, payload));
resolve(await w[action].apply(this, [...payload, job.id]));
} catch (err) {
reject(err);
} finally {
@ -29,22 +42,30 @@ module.exports = () => {
dequeue();
}
});
log(`[${id}]: add ${job.id} to JobQueue`);
log(`[${id}]: JobQueue length=${jobQueue.length}`);
dequeue();
})
);
const addWorker = (w) => {
workers[w.id] = w;
log(`[${id}]: add ${w.id}`);
log(`[${id}]: number of workers=${getNumWorkers()}`);
dequeue();
return w.id;
};
const addJob = (action, ...payload) => (
queue(action, payload)
);
const addJob = async (action, ...payload) => {
if (getNumWorkers() === 0) {
throw Error(`[${id}]: You need to have at least one worker before adding jobs`);
}
return queue(action, payload);
};
const terminate = async () => {
Object.keys(workers).forEach(async (id) => {
await workers[id].terminate();
Object.keys(workers).forEach(async (wid) => {
await workers[wid].terminate();
});
jobQueue = [];
};
@ -53,5 +74,7 @@ module.exports = () => {
addWorker,
addJob,
terminate,
getQueueLen,
getNumWorkers,
};
};

@ -2,6 +2,7 @@ const resolvePaths = require('./utils/resolvePaths');
const circularize = require('./utils/circularize');
const createJob = require('./createJob');
const log = require('./utils/log');
const getId = require('./utils/getId');
const { defaultOEM } = require('./constants/config');
const {
defaultOptions,
@ -12,11 +13,10 @@ const {
send,
} = require('./worker/node');
let workerCounter = 1;
let workerCounter = 0;
module.exports = (_options = {}) => {
const id = `Worker-${workerCounter}-${Math.random().toString(16).slice(3, 8)}`;
workerCounter += 1;
const id = getId('Worker', workerCounter);
const {
logger,
...options
@ -28,6 +28,8 @@ module.exports = (_options = {}) => {
const rejects = {};
let worker = spawnWorker(options);
workerCounter += 1;
const setResolve = (action, res) => {
resolves[action] = res;
};
@ -36,10 +38,9 @@ module.exports = (_options = {}) => {
rejects[action] = rej;
};
const startJob = (action, payload = {}) => (
const startJob = ({ id: jobId, action, payload }) => (
new Promise((resolve, reject) => {
const { id: jobId } = createJob(action, payload);
log(`[${id}]: Start ${jobId}, action=${action}`);
log(`[${id}]: Start ${jobId}, action=${action}, payload=`, payload);
setResolve(action, resolve);
setReject(action, reject);
send(worker, {
@ -51,32 +52,58 @@ module.exports = (_options = {}) => {
})
);
const load = () => (
startJob('load', { options })
const load = jobId => (
startJob(createJob({
id: jobId, action: 'load', payload: { options },
}))
);
const loadLanguage = (langs = 'eng') => (
startJob('loadLanguage', { langs, options })
const loadLanguage = (langs = 'eng', jobId) => (
startJob(createJob({
id: jobId,
action: 'loadLanguage',
payload: { langs, options },
}))
);
const initialize = (langs = 'eng', oem = defaultOEM) => (
startJob('initialize', { langs, oem })
const initialize = (langs = 'eng', oem = defaultOEM, jobId) => (
startJob(createJob({
id: jobId,
action: 'initialize',
payload: { langs, oem },
}))
);
const setParameters = (params = {}) => (
startJob('setParameters', { params })
const setParameters = (params = {}, jobId) => (
startJob(createJob({
id: jobId,
action: 'setParameters',
payload: { params },
}))
);
const recognize = async (image, opts = {}) => (
startJob('recognize', { image: await loadImage(image), options: opts })
const recognize = async (image, opts = {}, jobId) => (
startJob(createJob({
id: jobId,
action: 'recognize',
payload: { image: await loadImage(image), options: opts },
}))
);
const getPDF = (title = 'Tesseract OCR Result', textonly = false) => (
startJob('getPDF', { title, textonly })
const getPDF = (title = 'Tesseract OCR Result', textonly = false, jobId) => (
startJob(createJob({
id: jobId,
action: 'getPDF',
payload: { title, textonly },
}))
);
const detect = async image => (
startJob('detect', { image: await loadImage(image) })
const detect = async (image, jobId) => (
startJob(createJob({
id: jobId,
action: 'detect',
payload: { image: await loadImage(image) },
}))
);
const terminate = async () => {
@ -88,15 +115,18 @@ module.exports = (_options = {}) => {
return Promise.resolve();
};
onMessage(worker, ({ status, action, data }) => {
onMessage(worker, ({
workerId, jobId, status, action, data,
}) => {
if (status === 'resolve') {
log(`[${workerId}]: Complete ${jobId}, data=`, data);
let d = data;
if (action === 'recognize') {
d = circularize(data);
} else if (action === 'getPDF') {
d = Array.from({ ...data, length: Object.keys(data).length });
}
resolves[action](d);
resolves[action]({ jobId, data: d });
} else if (status === 'reject') {
rejects[action](data);
throw Error(data);

@ -0,0 +1,3 @@
module.exports = (prefix, cnt) => (
`${prefix}-${cnt}-${Math.random().toString(16).slice(3, 8)}`
);

@ -15,6 +15,7 @@ const dump = require('./utils/dump');
const isBrowser = require('../utils/getEnvironment')('type') === 'browser';
const setImage = require('./utils/setImage');
const defaultParams = require('./constants/defaultParams');
const log = require('../utils/log');
/*
* Tesseract Module returned by TesseractCore.
@ -23,7 +24,7 @@ let TessModule;
/*
* TessearctBaseAPI instance
*/
let api;
let api = null;
let latestJob;
let adapter = {};
let params = defaultParams;
@ -77,11 +78,13 @@ const loadLanguage = async ({
try {
const _data = await readCache(`${cachePath || '.'}/${lang}.traineddata`);
if (typeof _data !== 'undefined') {
log(`[${workerId}]: Load ${lang}.traineddata from cache`);
data = _data;
} else {
throw Error('Not found in cache');
}
} catch (e) {
log(`[${workerId}]: Load ${lang}.traineddata from ${langPath}`);
if (typeof _lang === 'string') {
let path = null;
@ -173,8 +176,12 @@ const initialize = ({
res.progress({
workerId, status: 'initializing api', progress: 0,
});
if (api !== null) {
api.End();
}
api = new TessModule.TessBaseAPI();
api.Init(null, langs, oem);
params = defaultParams;
setParameters({ payload: { params } });
res.progress({
workerId, status: 'initialized api', progress: 1,
@ -242,7 +249,9 @@ const detect = ({ payload: { image } }, res) => {
const terminate = (_, res) => {
try {
api.End();
if (api !== null) {
api.End();
}
res.resolve({ terminated: true });
} catch (err) {
res.reject(err.toString());

@ -1,5 +1,5 @@
const resolveURL = require('resolve-url');
const { version } = require('../../../package.json');
const { version, dependencies } = require('../../../package.json');
const defaultOptions = require('../../constants/defaultOptions');
/*
@ -14,5 +14,5 @@ module.exports = {
* If browser doesn't support WebAssembly,
* load ASM version instead
*/
corePath: `https://unpkg.com/tesseract.js-core@v2.0.0-beta.10/tesseract-core.${typeof WebAssembly === 'object' ? 'wasm' : 'asm'}.js`,
corePath: `https://unpkg.com/tesseract.js-core@v${dependencies['tesseract.js-core'].substring(1)}/tesseract-core.${typeof WebAssembly === 'object' ? 'wasm' : 'asm'}.js`,
};

@ -0,0 +1,18 @@
const resolveURL = require('resolve-url');
const { version, dependencies } = require('../../../package.json');
const defaultOptions = require('../../constants/defaultOptions');
/*
* Default options for browser worker
*/
module.exports = {
...defaultOptions,
workerPath: (typeof process !== 'undefined' && process.env.TESS_ENV === 'development')
? resolveURL(`/dist/worker.dev.js?nocache=${Math.random().toString(36).slice(3)}`)
: `https://unpkg.com/tesseract.js@v${version}/dist/worker.min.js`,
/*
* If browser doesn't support WebAssembly,
* load ASM version instead
*/
corePath: `https://unpkg.com/tesseract.js-core@v${dependencies['tesseract.js-core'].substring(1)}/tesseract-core.${typeof WebAssembly === 'object' ? 'wasm' : 'asm'}.js`,
};

@ -56,10 +56,10 @@ const loadImage = async (image) => {
}
} else if (image instanceof HTMLElement) {
if (image.tagName === 'IMG') {
data = loadImage(image.src);
data = await loadImage(image.src);
}
if (image.tagName === 'VIDEO') {
data = loadImage(image.poster);
data = await loadImage(image.poster);
}
if (image.tagName === 'CANVAS') {
await new Promise((resolve) => {

File diff suppressed because one or more lines are too long

@ -7,7 +7,8 @@
<div id="mocha"></div>
<script src="../node_modules/mocha/mocha.js"></script>
<script src="../node_modules/expect.js/index.js"></script>
<script src="http://localhost:3000/dist/tesseract.dev.js"></script>
<script src="../dist/tesseract.dev.js"></script>
<script src="./constants.js"></script>
<script>mocha.setup('bdd');</script>
<script src="./detect.test.js"></script>
<script>

@ -1,33 +1,21 @@
const { TesseractWorker } = Tesseract;
const isBrowser = typeof window !== 'undefined' && typeof window.document !== 'undefined';
const IMAGE_PATH = 'http://localhost:3000/tests/assets/images';
const loadLangOptions = {
langPath: 'http://localhost:3000/tests/assets/traineddata',
cachePath: './tests/assets/traineddata',
};
const getWorker = options => (
new TesseractWorker({
cacheMethod: 'readOnly',
...(isBrowser ? { workerPath: 'http://localhost:3000/dist/worker.dev.js' } : {}),
...loadLangOptions,
...options,
})
);
const { createScheduler, createWorker } = Tesseract;
const scheduler = createScheduler();
const worker = createWorker(OPTIONS);
scheduler.addWorker(worker);
before(function cb() {
this.timeout(0);
return worker.load();
});
describe('detect()', () => {
it('should detect OSD', (done) => {
describe('detect()', async () => {
it('should detect OSD', () => {
[
{ name: 'cosmic.png', ans: { id: 12, degree: 0 } },
].forEach(({ name, ans: { id, degree } }) => {
const worker = getWorker();
worker
.detect(`${IMAGE_PATH}/${name}`)
.then(({ tesseract_script_id, orientation_degrees }) => {
expect(tesseract_script_id).to.be(id);
expect(orientation_degrees).to.be(degree);
done();
});
{ name: 'cosmic.png', ans: { script: 'Latin' } },
].forEach(async ({ name, ans: { script } }) => {
await worker.loadLanguage('osd');
await worker.initialize('osd');
const { data: { script: s } } = await scheduler.addJob('detect', `${IMAGE_PATH}/${name}`);
expect(s).to.be(script);
});
}).timeout(10000);
}).timeout(TIMEOUT);
});

@ -7,7 +7,8 @@
<div id="mocha"></div>
<script src="../node_modules/mocha/mocha.js"></script>
<script src="../node_modules/expect.js/index.js"></script>
<script src="http://localhost:3000/dist/tesseract.dev.js"></script>
<script src="../dist/tesseract.dev.js"></script>
<script src="./constants.js"></script>
<script>mocha.setup('bdd');</script>
<script src="./recognize.test.js"></script>
<script>

File diff suppressed because one or more lines are too long

@ -0,0 +1,18 @@
<html>
<head>
<meta charset="utf-8">
<link rel="stylesheet" href="../node_modules/mocha/mocha.css">
</head>
<body>
<div id="mocha"></div>
<script src="../node_modules/mocha/mocha.js"></script>
<script src="../node_modules/expect.js/index.js"></script>
<script src="../dist/tesseract.dev.js"></script>
<script src="./constants.js"></script>
<script>mocha.setup('bdd');</script>
<script src="./scheduler.test.js"></script>
<script>
mocha.run();
</script>
</body>
</html>

@ -0,0 +1,35 @@
const { createScheduler, createWorker } = Tesseract;
let workers = [];
before(async function cb() {
this.timeout(0);
const NUM_WORKERS = 10;
console.log(`Initializing ${NUM_WORKERS} workers`);
workers = await Promise.all(Array(NUM_WORKERS).fill(0).map(async () => {
const w = createWorker(OPTIONS);
await w.load();
await w.loadLanguage('eng');
await w.initialize('eng');
return w;
}));
console.log(`Initialized ${NUM_WORKERS} workers`);
});
describe('scheduler', () => {
describe('should speed up with more workers (running 20 jobs)', () => {
Array(10).fill(0).forEach((_, num) => (
it(`support using ${num + 1} workers`, async () => {
const NUM_JOBS = 30;
const scheduler = createScheduler();
workers.slice(0, num + 1).forEach((w) => {
scheduler.addWorker(w);
});
const rets = await Promise.all(Array(NUM_JOBS).fill(0).map((_, idx) => (
scheduler.addJob('recognize', `${IMAGE_PATH}/${idx % 2 === 0 ? 'simple' : 'cosmic'}.png`)
)));
expect(rets.length).to.be(NUM_JOBS);
}).timeout(60000)
));
});
});
Loading…
Cancel
Save