joermd commited on
Commit
91e5a7d
·
verified ·
1 Parent(s): 211964f

Update ocr.html

Browse files
Files changed (1) hide show
  1. ocr.html +514 -0
ocr.html CHANGED
@@ -0,0 +1,514 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <title>UFastPro OCR Technology</title>
5
+ <meta charset="UTF-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1">
7
+ <link rel="stylesheet" href="https://unpkg.com/@picocss/pico@latest/css/pico.min.css">
8
+ <!-- PDF.js library -->
9
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.4.120/pdf.min.js"></script>
10
+ <style>
11
+ :root {
12
+ --primary: #4361ee;
13
+ --primary-hover: #3a56d4;
14
+ --secondary: #4cc9f0;
15
+ --secondary-hover: #3db8dd;
16
+ }
17
+
18
+ body {
19
+ background-color: #f8f9fa;
20
+ }
21
+
22
+ .container {
23
+ max-width: 900px;
24
+ }
25
+
26
+ header {
27
+ margin-bottom: 2rem;
28
+ padding: 1rem 0;
29
+ background: linear-gradient(135deg, var(--primary), var(--secondary));
30
+ color: white;
31
+ border-radius: 0 0 10px 10px;
32
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
33
+ }
34
+
35
+ header h1 {
36
+ margin: 0;
37
+ font-weight: bold;
38
+ }
39
+
40
+ header p {
41
+ margin: 0.5rem 0 0 0;
42
+ opacity: 0.9;
43
+ }
44
+
45
+ article {
46
+ margin: 2rem 0;
47
+ padding: 2rem;
48
+ background-color: white;
49
+ border-radius: 10px;
50
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
51
+ }
52
+
53
+ .progress-container {
54
+ width: 100%;
55
+ margin: 20px 0;
56
+ }
57
+
58
+ .progress-bar {
59
+ height: 20px;
60
+ background: linear-gradient(to right, var(--primary), var(--secondary));
61
+ width: 0%;
62
+ border-radius: 5px;
63
+ transition: width 0.3s;
64
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
65
+ }
66
+
67
+ .page-result {
68
+ margin-bottom: 20px;
69
+ border: 1px solid #e9ecef;
70
+ padding: 20px;
71
+ border-radius: 8px;
72
+ background-color: white;
73
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
74
+ transition: transform 0.2s;
75
+ }
76
+
77
+ .page-result:hover {
78
+ transform: translateY(-2px);
79
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
80
+ }
81
+
82
+ .page-result h4 {
83
+ margin-top: 0;
84
+ color: var(--primary);
85
+ border-bottom: 2px solid #f1f3f5;
86
+ padding-bottom: 0.5rem;
87
+ }
88
+
89
+ #resultsContainer {
90
+ max-height: 600px;
91
+ overflow-y: auto;
92
+ margin-top: 20px;
93
+ padding-right: 10px;
94
+ }
95
+
96
+ .status-text {
97
+ text-align: center;
98
+ margin: 10px 0;
99
+ font-weight: 500;
100
+ color: #495057;
101
+ }
102
+
103
+ button {
104
+ background-color: var(--primary);
105
+ transition: all 0.2s;
106
+ }
107
+
108
+ button:hover {
109
+ background-color: var(--primary-hover);
110
+ transform: translateY(-2px);
111
+ }
112
+
113
+ #cancelProcessingBtn {
114
+ background-color: #e63946;
115
+ }
116
+
117
+ #cancelProcessingBtn:hover {
118
+ background-color: #d62828;
119
+ }
120
+
121
+ pre {
122
+ background-color: #f8f9fa;
123
+ border-radius: 5px;
124
+ padding: 10px;
125
+ font-size: 0.9rem;
126
+ }
127
+
128
+ details summary {
129
+ cursor: pointer;
130
+ color: var(--primary);
131
+ font-weight: 500;
132
+ }
133
+
134
+ footer {
135
+ text-align: center;
136
+ margin-top: 2rem;
137
+ padding: 1rem 0;
138
+ color: #6c757d;
139
+ font-size: 0.9rem;
140
+ }
141
+
142
+ .logo {
143
+ font-weight: bold;
144
+ font-size: 1.2rem;
145
+ color: white;
146
+ text-decoration: none;
147
+ }
148
+
149
+ .logo span {
150
+ color: var(--secondary);
151
+ }
152
+
153
+ .upload-container {
154
+ border: 2px dashed #ced4da;
155
+ border-radius: 8px;
156
+ padding: 2rem;
157
+ text-align: center;
158
+ transition: all 0.2s;
159
+ cursor: pointer;
160
+ margin-bottom: 1rem;
161
+ }
162
+
163
+ .upload-container:hover {
164
+ border-color: var(--primary);
165
+ background-color: rgba(67, 97, 238, 0.05);
166
+ }
167
+
168
+ .upload-icon {
169
+ font-size: 2rem;
170
+ color: var(--primary);
171
+ margin-bottom: 1rem;
172
+ }
173
+
174
+ .export-button {
175
+ display: none;
176
+ margin-top: 20px;
177
+ }
178
+
179
+ @media (max-width: 768px) {
180
+ article {
181
+ padding: 1rem;
182
+ }
183
+ }
184
+ </style>
185
+ </head>
186
+ <body>
187
+ <header class="container">
188
+ <div style="text-align: center;">
189
+ <a href="#" class="logo">UFast<span>Pro</span> OCR Technology</a>
190
+ <p>Advanced PDF Text Recognition System</p>
191
+ </div>
192
+ </header>
193
+
194
+ <main class="container">
195
+ <article>
196
+ <h3 style="text-align: center; margin-top: 0;">PDF Text Extraction</h3>
197
+ <p style="text-align: center;">Upload any PDF document to extract text from all pages using our advanced OCR technology.</p>
198
+
199
+ <div class="upload-container" id="uploadContainer">
200
+ <div class="upload-icon">📄</div>
201
+ <p>Drag & drop your PDF here or click to browse</p>
202
+ <input type="file" id="pdfFile" name="pdfFile" accept=".pdf" style="display: none;">
203
+ </div>
204
+
205
+ <div class="progress-container" id="progressContainer" hidden>
206
+ <div class="progress-bar" id="progressBar"></div>
207
+ <p class="status-text" id="statusText">Ready to process</p>
208
+ </div>
209
+
210
+ <div style="display: flex; gap: 10px; justify-content: center;">
211
+ <button id="startProcessingBtn" disabled>Start Processing</button>
212
+ <button id="cancelProcessingBtn" hidden>Cancel Processing</button>
213
+ </div>
214
+
215
+ <button id="exportAllTextBtn" class="export-button">Export All Text</button>
216
+ </article>
217
+
218
+ <div id="resultsContainer"></div>
219
+ </main>
220
+
221
+ <footer class="container">
222
+ <p>&copy; 2025 UFastPro OCR Technology. All rights reserved.</p>
223
+ </footer>
224
+
225
+ <script>
226
+ // Set up PDF.js worker
227
+ pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.4.120/pdf.worker.min.js';
228
+
229
+ // API keys pool
230
+ const API_KEYS = [
231
+ '32769fb369mshfdf6f5e28e26674p1f3764jsn2a31085a1fc7',
232
+ '091dbfbf0emsh6606eb2165cfd97p191257jsnd017aa229e09',
233
+ '2175cafd75msh4465ea00b022145p1e062ajsn05e78cebc495',
234
+ 'eb11693cddmshb8bd157e05b74acp1f6aa4jsn4369fa546e55',
235
+ 'e9c1cd0db3mshb54648cf3400243p1d0a2bjsnf0d54e43fca2',
236
+ 'fdb95540bamsh786e3ec8083bb22p1b4c1ajsn44fecbb812bb'
237
+ ];
238
+
239
+ // Get random API key from the pool
240
+ function getRandomApiKey() {
241
+ const randomIndex = Math.floor(Math.random() * API_KEYS.length);
242
+ return API_KEYS[randomIndex];
243
+ }
244
+
245
+ const RAPIDAPI_HOST = 'ocr43.p.rapidapi.com';
246
+ const API_URL = 'https://ocr43.p.rapidapi.com/v1/results';
247
+
248
+ document.addEventListener('DOMContentLoaded', function() {
249
+ const pdfFileInput = document.getElementById('pdfFile');
250
+ const uploadContainer = document.getElementById('uploadContainer');
251
+ const startProcessingBtn = document.getElementById('startProcessingBtn');
252
+ const cancelProcessingBtn = document.getElementById('cancelProcessingBtn');
253
+ const progressContainer = document.getElementById('progressContainer');
254
+ const progressBar = document.getElementById('progressBar');
255
+ const statusText = document.getElementById('statusText');
256
+ const resultsContainer = document.getElementById('resultsContainer');
257
+ const exportAllTextBtn = document.getElementById('exportAllTextBtn');
258
+
259
+ let pdfDocument = null;
260
+ let isProcessing = false;
261
+ let shouldCancel = false;
262
+ let allExtractedText = [];
263
+
264
+ // Event listeners
265
+ pdfFileInput.addEventListener('change', handlePdfSelection);
266
+ uploadContainer.addEventListener('click', () => pdfFileInput.click());
267
+ uploadContainer.addEventListener('dragover', (e) => {
268
+ e.preventDefault();
269
+ uploadContainer.style.borderColor = 'var(--primary)';
270
+ uploadContainer.style.backgroundColor = 'rgba(67, 97, 238, 0.05)';
271
+ });
272
+ uploadContainer.addEventListener('dragleave', (e) => {
273
+ e.preventDefault();
274
+ uploadContainer.style.borderColor = '#ced4da';
275
+ uploadContainer.style.backgroundColor = '';
276
+ });
277
+ uploadContainer.addEventListener('drop', (e) => {
278
+ e.preventDefault();
279
+ uploadContainer.style.borderColor = '#ced4da';
280
+ uploadContainer.style.backgroundColor = '';
281
+
282
+ if (e.dataTransfer.files.length) {
283
+ pdfFileInput.files = e.dataTransfer.files;
284
+ handlePdfSelection({ target: { files: e.dataTransfer.files } });
285
+ }
286
+ });
287
+ startProcessingBtn.addEventListener('click', startProcessing);
288
+ cancelProcessingBtn.addEventListener('click', cancelProcessing);
289
+ exportAllTextBtn.addEventListener('click', exportAllText);
290
+
291
+ // Handle PDF file selection
292
+ async function handlePdfSelection(event) {
293
+ const file = event.target.files[0];
294
+ if (!file || file.type !== 'application/pdf') {
295
+ alert('Please select a valid PDF file.');
296
+ return;
297
+ }
298
+
299
+ try {
300
+ // Update UI to show loading
301
+ statusText.textContent = 'Loading PDF...';
302
+ progressContainer.hidden = false;
303
+ progressBar.style.width = '10%';
304
+
305
+ // Load the PDF file
306
+ const arrayBuffer = await file.arrayBuffer();
307
+ pdfDocument = await pdfjsLib.getDocument({ data: arrayBuffer }).promise;
308
+
309
+ // Enable the start button and update status
310
+ startProcessingBtn.disabled = false;
311
+ statusText.textContent = `PDF loaded with ${pdfDocument.numPages} pages. Click "Start Processing" to begin.`;
312
+ progressBar.style.width = '20%';
313
+ } catch (error) {
314
+ console.error('Error loading PDF:', error);
315
+ alert('Error loading PDF file. Please try again.');
316
+ progressContainer.hidden = true;
317
+ }
318
+ }
319
+
320
+ // Start processing the PDF
321
+ async function startProcessing() {
322
+ if (!pdfDocument || isProcessing) return;
323
+
324
+ isProcessing = true;
325
+ shouldCancel = false;
326
+ startProcessingBtn.hidden = true;
327
+ cancelProcessingBtn.hidden = false;
328
+ resultsContainer.innerHTML = '';
329
+ allExtractedText = [];
330
+ exportAllTextBtn.style.display = 'none';
331
+
332
+ const totalPages = pdfDocument.numPages;
333
+
334
+ for (let pageNum = 1; pageNum <= totalPages; pageNum++) {
335
+ if (shouldCancel) {
336
+ statusText.textContent = 'Processing cancelled.';
337
+ break;
338
+ }
339
+
340
+ // Update progress
341
+ updateProgress(pageNum, totalPages);
342
+
343
+ try {
344
+ // Process the current page
345
+ await processPage(pageNum, totalPages);
346
+ } catch (error) {
347
+ console.error(`Error processing page ${pageNum}:`, error);
348
+ addPageResult(pageNum, `Error: ${error.message}`, null);
349
+ allExtractedText.push(`[PAGE ${pageNum}]\nError: ${error.message}\n`);
350
+ }
351
+ }
352
+
353
+ // Reset UI after processing
354
+ if (!shouldCancel) {
355
+ statusText.textContent = 'Processing complete!';
356
+ progressBar.style.width = '100%';
357
+ exportAllTextBtn.style.display = 'block';
358
+ }
359
+
360
+ isProcessing = false;
361
+ startProcessingBtn.hidden = false;
362
+ cancelProcessingBtn.hidden = true;
363
+ }
364
+
365
+ // Cancel the processing
366
+ function cancelProcessing() {
367
+ shouldCancel = true;
368
+ statusText.textContent = 'Cancelling...';
369
+ }
370
+
371
+ // Update progress bar and status text
372
+ function updateProgress(current, total) {
373
+ const percentage = 20 + ((current / total) * 80); // Start at 20% (after loading)
374
+ progressBar.style.width = `${percentage}%`;
375
+ statusText.textContent = `Processing page ${current} of ${total}`;
376
+ }
377
+
378
+ // Export all extracted text
379
+ function exportAllText() {
380
+ if (allExtractedText.length === 0) return;
381
+
382
+ const combinedText = allExtractedText.join('\n\n');
383
+ const blob = new Blob([combinedText], { type: 'text/plain' });
384
+ const url = URL.createObjectURL(blob);
385
+
386
+ const a = document.createElement('a');
387
+ a.href = url;
388
+ a.download = 'extracted_text.txt';
389
+ document.body.appendChild(a);
390
+ a.click();
391
+ document.body.removeChild(a);
392
+ URL.revokeObjectURL(url);
393
+ }
394
+
395
+ // Process a single page
396
+ async function processPage(pageNum, totalPages) {
397
+ // Get the page
398
+ const page = await pdfDocument.getPage(pageNum);
399
+
400
+ // Create a canvas for the page (not displayed but needed for processing)
401
+ const viewport = page.getViewport({ scale: 1.5 });
402
+ const canvas = document.createElement('canvas');
403
+ const context = canvas.getContext('2d');
404
+ canvas.height = viewport.height;
405
+ canvas.width = viewport.width;
406
+
407
+ // Render the page to the canvas
408
+ await page.render({
409
+ canvasContext: context,
410
+ viewport: viewport
411
+ }).promise;
412
+
413
+ // Convert canvas to blob
414
+ const blob = await new Promise(resolve => {
415
+ canvas.toBlob(resolve, 'image/png');
416
+ });
417
+
418
+ // Get a random API key for this request
419
+ const randomApiKey = getRandomApiKey();
420
+
421
+ // Send the image to OCR API
422
+ const formData = new FormData();
423
+ formData.append('image', blob, `page-${pageNum}.png`);
424
+
425
+ const response = await fetch(API_URL, {
426
+ method: 'POST',
427
+ headers: {
428
+ 'X-RapidAPI-Key': randomApiKey,
429
+ 'X-RapidAPI-Host': RAPIDAPI_HOST
430
+ },
431
+ body: formData
432
+ });
433
+
434
+ const data = await response.json();
435
+
436
+ // Extract text from the response
437
+ let extractedText = '';
438
+ try {
439
+ extractedText = data.results[0].entities[0].objects[0].entities[0].text;
440
+ } catch (error) {
441
+ extractedText = 'No text could be extracted from this page.';
442
+ }
443
+
444
+ // Store the extracted text
445
+ allExtractedText.push(`[PAGE ${pageNum}]\n${extractedText}`);
446
+
447
+ // Add the result to the UI
448
+ addPageResult(pageNum, extractedText, data);
449
+ }
450
+
451
+ // Add a page result to the UI
452
+ function addPageResult(pageNum, text, rawData) {
453
+ const pageResult = document.createElement('div');
454
+ pageResult.className = 'page-result';
455
+
456
+ // Add page header
457
+ const header = document.createElement('h4');
458
+ header.textContent = `Page ${pageNum}`;
459
+ pageResult.appendChild(header);
460
+
461
+ // Add extracted text
462
+ const textHeader = document.createElement('h5');
463
+ textHeader.textContent = 'Extracted Text:';
464
+ pageResult.appendChild(textHeader);
465
+
466
+ const textContent = document.createElement('pre');
467
+ textContent.style.whiteSpace = 'pre-wrap';
468
+ textContent.textContent = text || 'No text extracted';
469
+ pageResult.appendChild(textContent);
470
+
471
+ // Add copy button
472
+ const copyButton = document.createElement('button');
473
+ copyButton.textContent = 'Copy Text';
474
+ copyButton.setAttribute('aria-label', 'Copy text to clipboard');
475
+ copyButton.style.marginBottom = '10px';
476
+ copyButton.addEventListener('click', () => {
477
+ navigator.clipboard.writeText(text || '')
478
+ .then(() => {
479
+ const originalText = copyButton.textContent;
480
+ copyButton.textContent = 'Copied!';
481
+ setTimeout(() => {
482
+ copyButton.textContent = originalText;
483
+ }, 2000);
484
+ })
485
+ .catch(err => console.error('Failed to copy text:', err));
486
+ });
487
+ pageResult.appendChild(copyButton);
488
+
489
+ // Add raw data toggle (optional, can be removed if not needed)
490
+ if (rawData) {
491
+ const rawToggle = document.createElement('details');
492
+ const rawSummary = document.createElement('summary');
493
+ rawSummary.textContent = 'Show Raw API Response';
494
+ rawToggle.appendChild(rawSummary);
495
+
496
+ const rawContent = document.createElement('pre');
497
+ rawContent.style.maxHeight = '200px';
498
+ rawContent.style.overflow = 'auto';
499
+ rawContent.textContent = JSON.stringify(rawData, null, 2);
500
+ rawToggle.appendChild(rawContent);
501
+
502
+ pageResult.appendChild(rawToggle);
503
+ }
504
+
505
+ // Add to results container
506
+ resultsContainer.appendChild(pageResult);
507
+
508
+ // Scroll to the new result
509
+ pageResult.scrollIntoView({ behavior: 'smooth', block: 'end' });
510
+ }
511
+ });
512
+ </script>
513
+ </body>
514
+ </html>