File size: 3,780 Bytes
7850a69
 
 
 
 
 
 
b4bc5f7
7850a69
b4bc5f7
 
 
 
 
 
7850a69
 
b4bc5f7
 
7850a69
 
b4bc5f7
7850a69
 
b4bc5f7
 
 
 
 
 
 
 
 
7850a69
b4bc5f7
 
 
7850a69
b4bc5f7
 
 
 
 
 
 
7850a69
b4bc5f7
aa68578
b4bc5f7
 
 
7850a69
b4bc5f7
aa68578
b4bc5f7
 
 
 
7850a69
b4bc5f7
aa68578
b4bc5f7
 
 
7850a69
b4bc5f7
 
 
 
 
aa68578
7850a69
 
b4bc5f7
 
 
 
21cc687
 
b4bc5f7
21cc687
b4bc5f7
 
 
 
 
 
 
 
 
 
 
 
7850a69
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import os
import tempfile
import gradio as gr
from gradio_pdf import PDF
from remittance_pdf_processor import process_pdf_with_flow
from remittance_pdf_processing_types import Candidate, InvoiceNumbers, PaymentAmount, ProcessedPDFResult
from remittance_pdf_processing_utils import remittance_logger, format_amount_str_to_decimal
from decimal import Decimal, InvalidOperation

def is_valid_decimal(s: str) -> bool:
	try:
		Decimal(s)
		return True
	except InvalidOperation:
		return False

def custom_invoice_numbers_verifier(invoice_numbers: InvoiceNumbers, ground_truth_invoices: str) -> InvoiceNumbers:
	ground_truth_set = set(map(str.lower, map(str.strip, ground_truth_invoices.split(','))))
	return [num for num in invoice_numbers if num.lower() in ground_truth_set]	

def custom_invoice_and_amount_verifier(invoice_numbers: InvoiceNumbers, amount: PaymentAmount, ground_truth_invoices: str, ground_truth_amount: str) -> bool:
	return format_amount_str_to_decimal(amount) == format_amount_str_to_decimal(ground_truth_amount)

def process_pdf_file(pdf_file_path: str, ground_truth_invoices: str, ground_truth_amount: str):
	# Input validation
	if not ground_truth_invoices.strip():
		return "Error: Ground Truth Invoices field cannot be empty."
	
	if not ground_truth_amount.strip():
		return "Error: Ground Truth Amount field cannot be empty."
	
	if not is_valid_decimal(ground_truth_amount.strip()):
		return "Error: Ground Truth Amount must be a valid decimal number."

	try:		
		invoice_verifier = lambda x: custom_invoice_numbers_verifier(x, ground_truth_invoices)
		invoice_and_amount_verifier = lambda x, y: custom_invoice_and_amount_verifier(x, y, ground_truth_invoices, ground_truth_amount)

		extracted_result: ProcessedPDFResult = process_pdf_with_flow(
			pdf_file_path, 
			invoice_verifier=invoice_verifier,
			invoice_and_amount_verifier=invoice_and_amount_verifier
		)
		
		verified_candidate, unverified_candidate = extracted_result

		result = ""
		# result += f"Extracted invoice numbers and payment amounts:\n"
		result += f"Ground Truth Invoices: {ground_truth_invoices}\n"
		result += f"Ground Truth Amount: {ground_truth_amount}\n"
		result += "-" * 40 + "\n"

		# Print Invoice Numbers
		result += "Extracted Invoice Numbers:\n"
		for i, invoice_numbers in enumerate(unverified_candidate[0], 1):
			status = "Verified" if any(set(invoice_numbers) == set(v) for v in verified_candidate[0]) else "Unverified"
			result += f"  Candidate {i} ({status}): {', '.join(invoice_numbers)}\n"
		result += "\n"

		# Print Payment Amounts
		result += "Extracted Payment Amounts:\n"
		for i, amount in enumerate(unverified_candidate[1], 1):
			status = "Verified" if amount in verified_candidate[1] else "Unverified"
			result += f"  Candidate {i} ({status}): {amount}\n"

		return result
	except Exception as e:
		error_message = f"An error occurred while processing the PDF: {str(e)}"
		remittance_logger.error(error_message, exc_info=True)
		return error_message

# Create the Gradio interface
with gr.Blocks() as iface:
	gr.Markdown("# Remittance PDF Processor")
	gr.Markdown("Upload a PDF file to extract invoice numbers and payment amounts. Provide ground truth data for comparison.")
	
	with gr.Row():
		with gr.Column():
			pdf_input = PDF(label="Upload PDF", visible=True, height=900)
		
		with gr.Column():
			ground_truth_invoices = gr.Textbox(label="Ground Truth Invoice Numbers (comma-separated)")
			ground_truth_amount = gr.Textbox(label="Ground Truth Payment Amount")
			
			submit_button = gr.Button("Process PDF")
			
			output = gr.Textbox(label="Processing Results", lines=20)
	
	submit_button.click(
		process_pdf_file,
		inputs=[pdf_input, ground_truth_invoices, ground_truth_amount],
		outputs=[output]
	)

# Launch the interface
iface.launch()