Add ability to analyse layout without running recognition (#770)

master
Balearica 1 year ago committed by GitHub
parent 15fdd9e3ac
commit 6437f28704
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 1
      src/index.d.ts
  2. 2
      src/worker-script/constants/defaultOutput.js
  3. 14
      src/worker-script/index.js
  4. 38
      src/worker-script/utils/dump.js

1
src/index.d.ts vendored

@ -70,6 +70,7 @@ declare namespace Tesseract {
interface OutputFormats {
text: boolean;
blocks: boolean;
layoutBlocks: boolean;
hocr: boolean;
tsv: boolean;
box: boolean;

@ -5,6 +5,7 @@
module.exports = {
text: true,
blocks: true,
layoutBlocks: false,
hocr: true,
tsv: true,
box: false,
@ -14,4 +15,5 @@ module.exports = {
imageColor: false,
imageGrey: false,
imageBinary: false,
debug: false,
};

@ -255,7 +255,7 @@ const processOutput = (output) => {
if (params.tessjs_create_tsv === '1') workingOutput.tsv = true;
if (params.tessjs_create_unlv === '1') workingOutput.unlv = true;
const nonRecOutputs = ['imageColor', 'imageGrey', 'imageBinary'];
const nonRecOutputs = ['imageColor', 'imageGrey', 'imageBinary', 'layoutBlocks'];
let recOutputCount = 0;
for (const prop of Object.keys(output)) {
workingOutput[prop] = output[prop];
@ -267,7 +267,8 @@ const processOutput = (output) => {
}
}
}
return { workingOutput, recOutputCount };
const skipRecognition = recOutputCount === 0;
return { workingOutput, skipRecognition };
};
// List of options for Tesseract.js (rather than passed through to Tesseract),
@ -302,7 +303,7 @@ const recognize = async ({
}
}
const { workingOutput, recOutputCount } = processOutput(output);
const { workingOutput, skipRecognition } = processOutput(output);
// When the auto-rotate option is True, setImage is called with no angle,
// then the angle is calculated by Tesseract and then setImage is re-called.
@ -352,14 +353,17 @@ const recognize = async ({
api.SetRectangle(rec.left, rec.top, rec.width, rec.height);
}
if (recOutputCount > 0) {
if (!skipRecognition) {
api.Recognize(null);
} else {
if (output.layoutBlocks) {
api.AnalyseLayout();
}
log('Skipping recognition: all output options requiring recognition are disabled.');
}
const { pdfTitle } = options;
const { pdfTextOnly } = options;
const result = dump(TessModule, api, workingOutput, { pdfTitle, pdfTextOnly });
const result = dump(TessModule, api, workingOutput, { pdfTitle, pdfTextOnly, skipRecognition });
result.rotateRadians = rotateRadiansFinal;
if (output.debug) TessModule.FS.unlink('/debugInternal.txt');

@ -79,7 +79,10 @@ module.exports = (TessModule, api, output, options) => {
return TessModule.FS.readFile('/tesseract-ocr.pdf');
};
if (output.blocks) {
// If output.layoutBlocks is true and options.skipRecognition is true,
// the user wants layout data but text recognition has not been run.
// In this case, fields that require text recognition are skipped.
if (output.blocks || output.layoutBlocks) {
ri.Begin();
do {
if (ri.IsAtBeginningOf(RIL_BLOCK)) {
@ -102,8 +105,8 @@ module.exports = (TessModule, api, output, options) => {
block = {
paragraphs: [],
text: ri.GetUTF8Text(RIL_BLOCK),
confidence: ri.Confidence(RIL_BLOCK),
text: !options.skipRecognition ? ri.GetUTF8Text(RIL_BLOCK) : null,
confidence: !options.skipRecognition ? ri.Confidence(RIL_BLOCK) : null,
baseline: ri.getBaseline(RIL_BLOCK),
bbox: ri.getBoundingBox(RIL_BLOCK),
blocktype: enumToString(ri.BlockType(), 'PT'),
@ -114,8 +117,8 @@ module.exports = (TessModule, api, output, options) => {
if (ri.IsAtBeginningOf(RIL_PARA)) {
para = {
lines: [],
text: ri.GetUTF8Text(RIL_PARA),
confidence: ri.Confidence(RIL_PARA),
text: !options.skipRecognition ? ri.GetUTF8Text(RIL_PARA) : null,
confidence: !options.skipRecognition ? ri.Confidence(RIL_PARA) : null,
baseline: ri.getBaseline(RIL_PARA),
bbox: ri.getBoundingBox(RIL_PARA),
is_ltr: !!ri.ParagraphIsLtr(),
@ -125,8 +128,8 @@ module.exports = (TessModule, api, output, options) => {
if (ri.IsAtBeginningOf(RIL_TEXTLINE)) {
textline = {
words: [],
text: ri.GetUTF8Text(RIL_TEXTLINE),
confidence: ri.Confidence(RIL_TEXTLINE),
text: !options.skipRecognition ? ri.GetUTF8Text(RIL_TEXTLINE) : null,
confidence: !options.skipRecognition ? ri.Confidence(RIL_TEXTLINE) : null,
baseline: ri.getBaseline(RIL_TEXTLINE),
bbox: ri.getBoundingBox(RIL_TEXTLINE),
};
@ -139,8 +142,8 @@ module.exports = (TessModule, api, output, options) => {
symbols: [],
choices: [],
text: ri.GetUTF8Text(RIL_WORD),
confidence: ri.Confidence(RIL_WORD),
text: !options.skipRecognition ? ri.GetUTF8Text(RIL_WORD) : null,
confidence: !options.skipRecognition ? ri.Confidence(RIL_WORD) : null,
baseline: ri.getBaseline(RIL_WORD),
bbox: ri.getBoundingBox(RIL_WORD),
@ -162,8 +165,8 @@ module.exports = (TessModule, api, output, options) => {
const wc = new TessModule.WordChoiceIterator(ri);
do {
word.choices.push({
text: wc.GetUTF8Text(),
confidence: wc.Confidence(),
text: !options.skipRecognition ? wc.GetUTF8Text() : null,
confidence: !options.skipRecognition ? wc.Confidence() : null,
});
} while (wc.Next());
TessModule.destroy(wc);
@ -179,8 +182,8 @@ module.exports = (TessModule, api, output, options) => {
symbol = {
choices: [],
image: null,
text: ri.GetUTF8Text(RIL_SYMBOL),
confidence: ri.Confidence(RIL_SYMBOL),
text: !options.skipRecognition ? ri.GetUTF8Text(RIL_SYMBOL) : null,
confidence: !options.skipRecognition ? ri.Confidence(RIL_SYMBOL) : null,
baseline: ri.getBaseline(RIL_SYMBOL),
bbox: ri.getBoundingBox(RIL_SYMBOL),
is_superscript: !!ri.SymbolIsSuperscript(),
@ -191,8 +194,8 @@ module.exports = (TessModule, api, output, options) => {
const ci = new TessModule.ChoiceIterator(ri);
do {
symbol.choices.push({
text: ci.GetUTF8Text(),
confidence: ci.Confidence(),
text: !options.skipRecognition ? ci.GetUTF8Text() : null,
confidence: !options.skipRecognition ? ci.Confidence() : null,
});
} while (ci.Next());
// TessModule.destroy(i);
@ -212,8 +215,9 @@ module.exports = (TessModule, api, output, options) => {
imageColor: output.imageColor ? getImage(imageType.COLOR) : null,
imageGrey: output.imageGrey ? getImage(imageType.GREY) : null,
imageBinary: output.imageBinary ? getImage(imageType.BINARY) : null,
confidence: api.MeanTextConf(),
blocks: output.blocks ? blocks : null,
confidence: !options.skipRecognition ? api.MeanTextConf() : null,
blocks: output.blocks && !options.skipRecognition ? blocks : null,
layoutBlocks: output.layoutBlocks && options.skipRecognition ? blocks : null,
psm: enumToString(api.GetPageSegMode(), 'PSM'),
oem: enumToString(api.oem(), 'OEM'),
version: api.Version(),

Loading…
Cancel
Save