push up
Browse files- extractor_compare.py +89 -85
extractor_compare.py
CHANGED
|
@@ -226,10 +226,11 @@ def create_interface():
|
|
| 226 |
font-family: 'Local Arial', sans-serif;
|
| 227 |
}
|
| 228 |
</style>
|
| 229 |
-
<
|
| 230 |
-
|
|
|
|
| 231 |
<div id="pdf-fallback" style="position:absolute; top:0; left:0; width:100%; height:100%;
|
| 232 |
-
display:flex; align-items:center; justify-content:center; padding:20px; text-align:center;">
|
| 233 |
Click "Load PDFs" to start viewing documents.
|
| 234 |
</div>
|
| 235 |
</div>
|
|
@@ -350,110 +351,113 @@ def create_interface():
|
|
| 350 |
demo.load(
|
| 351 |
fn=None,
|
| 352 |
js="""
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
console.log('Setting up PDF data observer...');
|
| 356 |
|
| 357 |
-
//
|
| 358 |
-
|
|
|
|
|
|
|
|
|
|
| 359 |
try {
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
|
|
|
| 363 |
return;
|
| 364 |
}
|
| 365 |
|
| 366 |
-
|
| 367 |
-
const hiddenTextArea = targetNode.querySelector('textarea');
|
| 368 |
-
if (!hiddenTextArea) {
|
| 369 |
-
console.error('Hidden textarea not found within the container!');
|
| 370 |
-
return;
|
| 371 |
-
}
|
| 372 |
|
| 373 |
-
|
|
|
|
|
|
|
|
|
|
| 374 |
|
| 375 |
-
//
|
| 376 |
-
const
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
};
|
| 382 |
|
| 383 |
-
// Create and
|
| 384 |
-
const
|
| 385 |
-
|
| 386 |
-
if (hiddenTextArea.value && hiddenTextArea.value.length > 100) {
|
| 387 |
-
console.log('Valid value found in textarea, displaying PDF');
|
| 388 |
-
displayPdfBlob(hiddenTextArea.value);
|
| 389 |
-
}
|
| 390 |
-
});
|
| 391 |
|
| 392 |
-
//
|
| 393 |
-
|
| 394 |
-
|
| 395 |
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
|
|
|
|
|
|
|
|
|
| 400 |
}
|
| 401 |
} catch (error) {
|
| 402 |
-
console.error(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 403 |
}
|
| 404 |
-
}
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
const fallback = document.getElementById('pdf-fallback');
|
| 413 |
-
|
| 414 |
-
if (!iframe || !fallback) {
|
| 415 |
-
console.error('PDF viewer elements not found');
|
| 416 |
return;
|
| 417 |
}
|
| 418 |
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
for (let i = 0; i < len; i++) {
|
| 425 |
-
bytes[i] = binaryString.charCodeAt(i);
|
| 426 |
}
|
| 427 |
|
| 428 |
-
//
|
| 429 |
-
|
| 430 |
-
|
|
|
|
| 431 |
|
| 432 |
-
//
|
| 433 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 434 |
|
| 435 |
-
|
| 436 |
-
|
|
|
|
| 437 |
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
|
|
|
|
|
|
|
|
|
| 443 |
}
|
| 444 |
-
|
| 445 |
-
// Initialize the observer after everything is loaded
|
| 446 |
-
window.addEventListener('load', function() {
|
| 447 |
-
console.log('Window loaded, initializing PDF observer...');
|
| 448 |
-
setupPdfDataObserver();
|
| 449 |
-
});
|
| 450 |
-
|
| 451 |
-
// Also setup when Gradio mounts the component
|
| 452 |
-
document.addEventListener('DOMContentLoaded', function() {
|
| 453 |
-
console.log('DOM loaded, waiting for Gradio components...');
|
| 454 |
-
// Wait a bit longer for Gradio components to mount
|
| 455 |
-
setTimeout(setupPdfDataObserver, 2000);
|
| 456 |
-
});
|
| 457 |
"""
|
| 458 |
)
|
| 459 |
|
|
|
|
| 226 |
font-family: 'Local Arial', sans-serif;
|
| 227 |
}
|
| 228 |
</style>
|
| 229 |
+
<object id="pdf-object" type="application/pdf" width="100%" height="100%" style="display:none;">
|
| 230 |
+
<p>PDF cannot be displayed</p>
|
| 231 |
+
</object>
|
| 232 |
<div id="pdf-fallback" style="position:absolute; top:0; left:0; width:100%; height:100%;
|
| 233 |
+
display:flex; align-items:center; justify-content:center; padding:20px; text-align:center; font-family: Arial, sans-serif;">
|
| 234 |
Click "Load PDFs" to start viewing documents.
|
| 235 |
</div>
|
| 236 |
</div>
|
|
|
|
| 351 |
demo.load(
|
| 352 |
fn=None,
|
| 353 |
js="""
|
| 354 |
+
function() {
|
| 355 |
+
console.log("Setting up PDF viewer");
|
|
|
|
| 356 |
|
| 357 |
+
// Store the current blob URL
|
| 358 |
+
var pdfBlobUrl = null;
|
| 359 |
+
|
| 360 |
+
// Function to display PDF from base64 data
|
| 361 |
+
function displayPdfFromBase64(base64Data) {
|
| 362 |
try {
|
| 363 |
+
if (!base64Data || base64Data.length < 100) {
|
| 364 |
+
console.log("No valid PDF data received");
|
| 365 |
+
document.getElementById('pdf-fallback').style.display = 'flex';
|
| 366 |
+
document.getElementById('pdf-object').style.display = 'none';
|
| 367 |
return;
|
| 368 |
}
|
| 369 |
|
| 370 |
+
console.log("Displaying PDF from base64 data");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 371 |
|
| 372 |
+
// Clean up previous blob URL
|
| 373 |
+
if (pdfBlobUrl) {
|
| 374 |
+
URL.revokeObjectURL(pdfBlobUrl);
|
| 375 |
+
}
|
| 376 |
|
| 377 |
+
// Convert base64 to binary
|
| 378 |
+
const binary = atob(base64Data);
|
| 379 |
+
const bytes = new Uint8Array(binary.length);
|
| 380 |
+
for (let i = 0; i < binary.length; i++) {
|
| 381 |
+
bytes[i] = binary.charCodeAt(i);
|
| 382 |
+
}
|
|
|
|
| 383 |
|
| 384 |
+
// Create blob and URL
|
| 385 |
+
const blob = new Blob([bytes], {type: 'application/pdf'});
|
| 386 |
+
pdfBlobUrl = URL.createObjectURL(blob);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 387 |
|
| 388 |
+
// Display PDF in the object element
|
| 389 |
+
const pdfObject = document.getElementById('pdf-object');
|
| 390 |
+
const fallback = document.getElementById('pdf-fallback');
|
| 391 |
|
| 392 |
+
if (pdfObject && fallback) {
|
| 393 |
+
pdfObject.data = pdfBlobUrl;
|
| 394 |
+
pdfObject.style.display = 'block';
|
| 395 |
+
fallback.style.display = 'none';
|
| 396 |
+
console.log("PDF displayed successfully");
|
| 397 |
+
} else {
|
| 398 |
+
console.error("PDF viewer elements not found");
|
| 399 |
}
|
| 400 |
} catch (error) {
|
| 401 |
+
console.error("Error displaying PDF:", error);
|
| 402 |
+
const fallback = document.getElementById('pdf-fallback');
|
| 403 |
+
if (fallback) {
|
| 404 |
+
fallback.innerHTML = '<div style="color:red; font-family: Arial, sans-serif;">Error displaying PDF</div>';
|
| 405 |
+
fallback.style.display = 'flex';
|
| 406 |
+
}
|
| 407 |
}
|
| 408 |
+
}
|
| 409 |
+
|
| 410 |
+
// Check for PDF data repeatedly
|
| 411 |
+
function checkForPdfData() {
|
| 412 |
+
const dataElement = document.getElementById('pdf_base64_data');
|
| 413 |
+
if (!dataElement) {
|
| 414 |
+
console.log("PDF data element not found, will retry");
|
| 415 |
+
setTimeout(checkForPdfData, 1000);
|
|
|
|
|
|
|
|
|
|
|
|
|
| 416 |
return;
|
| 417 |
}
|
| 418 |
|
| 419 |
+
const textarea = dataElement.querySelector('textarea');
|
| 420 |
+
if (!textarea) {
|
| 421 |
+
console.log("Textarea not found, will retry");
|
| 422 |
+
setTimeout(checkForPdfData, 1000);
|
| 423 |
+
return;
|
|
|
|
|
|
|
| 424 |
}
|
| 425 |
|
| 426 |
+
// Display initial data if available
|
| 427 |
+
if (textarea.value && textarea.value.length > 100) {
|
| 428 |
+
displayPdfFromBase64(textarea.value);
|
| 429 |
+
}
|
| 430 |
|
| 431 |
+
// Set up polling to check for changes
|
| 432 |
+
setInterval(function() {
|
| 433 |
+
if (textarea.value && textarea.value.length > 100) {
|
| 434 |
+
displayPdfFromBase64(textarea.value);
|
| 435 |
+
}
|
| 436 |
+
}, 2000);
|
| 437 |
+
}
|
| 438 |
+
|
| 439 |
+
// Start checking for PDF data
|
| 440 |
+
setTimeout(checkForPdfData, 1000);
|
| 441 |
+
|
| 442 |
+
// Add keyboard shortcuts
|
| 443 |
+
document.addEventListener('keydown', function(event) {
|
| 444 |
+
if (event.target.tagName === 'INPUT' || event.target.tagName === 'TEXTAREA') {
|
| 445 |
+
return;
|
| 446 |
+
}
|
| 447 |
|
| 448 |
+
var buttonId = null;
|
| 449 |
+
if (event.key === 'ArrowLeft') buttonId = 'prev_button';
|
| 450 |
+
else if (event.key === 'ArrowRight') buttonId = 'next_button';
|
| 451 |
|
| 452 |
+
if (buttonId) {
|
| 453 |
+
var button = document.getElementById(buttonId);
|
| 454 |
+
if (button) {
|
| 455 |
+
event.preventDefault();
|
| 456 |
+
button.click();
|
| 457 |
+
}
|
| 458 |
+
}
|
| 459 |
+
});
|
| 460 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 461 |
"""
|
| 462 |
)
|
| 463 |
|