File size: 3,762 Bytes
7850a69
 
 
 
 
 
 
b4bc5f7
7850a69
b4bc5f7
 
 
 
 
 
7850a69
 
b4bc5f7
 
7850a69
 
b4bc5f7
7850a69
 
b4bc5f7
 
 
 
 
 
 
 
 
7850a69
b4bc5f7
 
 
7850a69
b4bc5f7
 
 
 
 
 
 
7850a69
b4bc5f7
 
 
 
 
7850a69
b4bc5f7
 
 
 
 
 
7850a69
b4bc5f7
 
 
 
 
7850a69
b4bc5f7
 
 
 
 
 
7850a69
 
b4bc5f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7850a69
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import os
import tempfile
import gradio as gr
from gradio_pdf import PDF
from remittance_pdf_processor import process_pdf_with_flow
from remittance_pdf_processing_types import Candidate, InvoiceNumbers, PaymentAmount, ProcessedPDFResult
from remittance_pdf_processing_utils import remittance_logger, format_amount_str_to_decimal
from decimal import Decimal, InvalidOperation

def is_valid_decimal(s: str) -> bool:
	try:
		Decimal(s)
		return True
	except InvalidOperation:
		return False

def custom_invoice_numbers_verifier(invoice_numbers: InvoiceNumbers, ground_truth_invoices: str) -> InvoiceNumbers:
	ground_truth_set = set(map(str.lower, map(str.strip, ground_truth_invoices.split(','))))
	return [num for num in invoice_numbers if num.lower() in ground_truth_set]	

def custom_invoice_and_amount_verifier(invoice_numbers: InvoiceNumbers, amount: PaymentAmount, ground_truth_invoices: str, ground_truth_amount: str) -> bool:
	return format_amount_str_to_decimal(amount) == format_amount_str_to_decimal(ground_truth_amount)

def process_pdf_file(pdf_file_path: str, ground_truth_invoices: str, ground_truth_amount: str):
	# Input validation
	if not ground_truth_invoices.strip():
		return "Error: Ground Truth Invoices field cannot be empty."
	
	if not ground_truth_amount.strip():
		return "Error: Ground Truth Amount field cannot be empty."
	
	if not is_valid_decimal(ground_truth_amount.strip()):
		return "Error: Ground Truth Amount must be a valid decimal number."

	try:		
		invoice_verifier = lambda x: custom_invoice_numbers_verifier(x, ground_truth_invoices)
		invoice_and_amount_verifier = lambda x, y: custom_invoice_and_amount_verifier(x, y, ground_truth_invoices, ground_truth_amount)

		extracted_result: ProcessedPDFResult = process_pdf_with_flow(
			pdf_file_path, 
			invoice_verifier=invoice_verifier,
			invoice_and_amount_verifier=invoice_and_amount_verifier
		)
		
		verified_candidate, unverified_candidate = extracted_result

		result = ""
		result += f"Extracted invoice numbers and payment amounts:\n"
		result += f"Ground Truth Invoices: {ground_truth_invoices}\n"
		result += f"Ground Truth Amount: {ground_truth_amount}\n"
		result += "-" * 40 + "\n"

		# Print Invoice Numbers
		result += "Invoice Numbers:\n"
		for i, invoice_numbers in enumerate(unverified_candidate[0], 1):
			status = "Verified" if any(set(invoice_numbers) == set(v) for v in verified_candidate[0]) else "Unverified"
			result += f"  Candidate {i} ({status}): {', '.join(invoice_numbers)}\n"
		result += "\n"

		# Print Payment Amounts
		result += "Payment Amounts:\n"
		for i, amount in enumerate(unverified_candidate[1], 1):
			status = "Verified" if amount in verified_candidate[1] else "Unverified"
			result += f"  Candidate {i} ({status}): {amount}\n"

		return result
	except Exception as e:
		error_message = f"An error occurred while processing the PDF: {str(e)}"
		remittance_logger.error(error_message, exc_info=True)
		return error_message
		
# Create the Gradio interface
with gr.Blocks() as iface:
	gr.Markdown("# Remittance PDF Processor")
	gr.Markdown("Upload a PDF file to extract invoice numbers and payment amounts. Provide ground truth data for comparison.")
	
	with gr.Row():
		with gr.Column(scale=1):
			pdf_input = PDF(label="Upload PDF", visible=True)
		
		with gr.Column(scale=1):
			ground_truth_invoices = gr.Textbox(label="Ground Truth Invoice Numbers (comma-separated)")
			ground_truth_amount = gr.Textbox(label="Ground Truth Payment Amount")
			
			submit_button = gr.Button("Process PDF")
			
			output = gr.Textbox(label="Processing Results", lines=20)
	
	submit_button.click(
		process_pdf_file,
		inputs=[pdf_input, ground_truth_invoices, ground_truth_amount],
		outputs=[output]
	)

# Launch the interface
iface.launch()