|
import os |
|
import tempfile |
|
import gradio as gr |
|
from gradio_pdf import PDF |
|
from remittance_pdf_processor import process_pdf_with_flow |
|
from remittance_pdf_processing_types import Candidate, InvoiceNumbers, PaymentAmount, ProcessedPDFResult |
|
from remittance_pdf_processing_utils import remittance_logger, format_amount_str_to_decimal |
|
from decimal import Decimal, InvalidOperation |
|
|
|
def is_valid_decimal(s: str) -> bool: |
|
try: |
|
Decimal(s) |
|
return True |
|
except InvalidOperation: |
|
return False |
|
|
|
def custom_invoice_numbers_verifier(invoice_numbers: InvoiceNumbers, ground_truth_invoices: str) -> InvoiceNumbers: |
|
ground_truth_set = set(map(str.lower, map(str.strip, ground_truth_invoices.split(',')))) |
|
return [num for num in invoice_numbers if num.lower() in ground_truth_set] |
|
|
|
def custom_invoice_and_amount_verifier(invoice_numbers: InvoiceNumbers, amount: PaymentAmount, ground_truth_invoices: str, ground_truth_amount: str) -> bool: |
|
return format_amount_str_to_decimal(amount) == format_amount_str_to_decimal(ground_truth_amount) |
|
|
|
def process_pdf_file(pdf_file_path: str, ground_truth_invoices: str, ground_truth_amount: str): |
|
|
|
if not ground_truth_invoices.strip(): |
|
return "Error: Ground Truth Invoices field cannot be empty." |
|
|
|
if not ground_truth_amount.strip(): |
|
return "Error: Ground Truth Amount field cannot be empty." |
|
|
|
if not is_valid_decimal(ground_truth_amount.strip()): |
|
return "Error: Ground Truth Amount must be a valid decimal number." |
|
|
|
try: |
|
invoice_verifier = lambda x: custom_invoice_numbers_verifier(x, ground_truth_invoices) |
|
invoice_and_amount_verifier = lambda x, y: custom_invoice_and_amount_verifier(x, y, ground_truth_invoices, ground_truth_amount) |
|
|
|
extracted_result: ProcessedPDFResult = process_pdf_with_flow( |
|
pdf_file_path, |
|
invoice_verifier=invoice_verifier, |
|
invoice_and_amount_verifier=invoice_and_amount_verifier |
|
) |
|
|
|
verified_candidate, unverified_candidate = extracted_result |
|
|
|
result = "" |
|
result += f"Extracted invoice numbers and payment amounts:\n" |
|
result += f"Ground Truth Invoices: {ground_truth_invoices}\n" |
|
result += f"Ground Truth Amount: {ground_truth_amount}\n" |
|
result += "-" * 40 + "\n" |
|
|
|
|
|
result += "Invoice Numbers:\n" |
|
for i, invoice_numbers in enumerate(unverified_candidate[0], 1): |
|
status = "Verified" if any(set(invoice_numbers) == set(v) for v in verified_candidate[0]) else "Unverified" |
|
result += f" Candidate {i} ({status}): {', '.join(invoice_numbers)}\n" |
|
result += "\n" |
|
|
|
|
|
result += "Payment Amounts:\n" |
|
for i, amount in enumerate(unverified_candidate[1], 1): |
|
status = "Verified" if amount in verified_candidate[1] else "Unverified" |
|
result += f" Candidate {i} ({status}): {amount}\n" |
|
|
|
return result |
|
except Exception as e: |
|
error_message = f"An error occurred while processing the PDF: {str(e)}" |
|
remittance_logger.error(error_message, exc_info=True) |
|
return error_message |
|
|
|
|
|
with gr.Blocks() as iface: |
|
gr.Markdown("# Remittance PDF Processor") |
|
gr.Markdown("Upload a PDF file to extract invoice numbers and payment amounts. Provide ground truth data for comparison.") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
pdf_input = PDF(label="Upload PDF", visible=True) |
|
|
|
with gr.Column(scale=1): |
|
ground_truth_invoices = gr.Textbox(label="Ground Truth Invoice Numbers (comma-separated)") |
|
ground_truth_amount = gr.Textbox(label="Ground Truth Payment Amount") |
|
|
|
submit_button = gr.Button("Process PDF") |
|
|
|
output = gr.Textbox(label="Processing Results", lines=20) |
|
|
|
submit_button.click( |
|
process_pdf_file, |
|
inputs=[pdf_input, ground_truth_invoices, ground_truth_amount], |
|
outputs=[output] |
|
) |
|
|
|
|
|
iface.launch() |