You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
54 lines
1.9 KiB
54 lines
1.9 KiB
<html>
|
|
<head>
|
|
<script src="/dist/tesseract.dev.js"></script>
|
|
</head>
|
|
<body>
|
|
<div>
|
|
<input type="file" id="uploader">
|
|
<button id="download-pdf" disabled="true">Download PDF</button>
|
|
</div>
|
|
<textarea id="board" readonly rows="8" cols="80">Upload an image file</textarea>
|
|
<script type="module">
|
|
const { createWorker } = Tesseract;
|
|
const worker = await createWorker({
|
|
corePath: '/node_modules/tesseract.js-core',
|
|
workerPath: "/dist/worker.dev.js",
|
|
logger: m => console.log(m),
|
|
});
|
|
const uploader = document.getElementById('uploader');
|
|
const dlBtn = document.getElementById('download-pdf');
|
|
let pdf;
|
|
const recognize = async ({ target: { files } }) => {
|
|
await worker.loadLanguage('eng');
|
|
await worker.initialize('eng');
|
|
const res = await worker.recognize(files[0],{pdfTitle: "Example PDF"},{pdf: true});
|
|
pdf = res.data.pdf;
|
|
const text = res.data.text;
|
|
const board = document.getElementById('board');
|
|
board.value = text;
|
|
dlBtn.disabled = false;
|
|
};
|
|
const downloadPDF = async () => {
|
|
const filename = 'tesseract-ocr-result.pdf';
|
|
const blob = new Blob([new Uint8Array(pdf)], { type: 'application/pdf' });
|
|
if (navigator.msSaveBlob) {
|
|
// IE 10+
|
|
navigator.msSaveBlob(blob, filename);
|
|
} else {
|
|
const link = document.createElement('a');
|
|
if (link.download !== undefined) {
|
|
const url = URL.createObjectURL(blob);
|
|
link.setAttribute('href', url);
|
|
link.setAttribute('download', filename);
|
|
link.style.visibility = 'hidden';
|
|
document.body.appendChild(link);
|
|
link.click();
|
|
document.body.removeChild(link);
|
|
}
|
|
}
|
|
};
|
|
uploader.addEventListener('change', recognize);
|
|
dlBtn.addEventListener('click', downloadPDF);
|
|
</script>
|
|
</body>
|
|
</html>
|
|
|