Added validation for the verification fields
Browse files
app.py
CHANGED
@@ -5,75 +5,92 @@ from gradio_pdf import PDF
|
|
5 |
from remittance_pdf_processor import process_pdf_with_flow
|
6 |
from remittance_pdf_processing_types import Candidate, InvoiceNumbers, PaymentAmount, ProcessedPDFResult
|
7 |
from remittance_pdf_processing_utils import remittance_logger, format_amount_str_to_decimal
|
|
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
def custom_invoice_numbers_verifier(invoice_numbers: InvoiceNumbers, ground_truth_invoices: str) -> InvoiceNumbers:
|
11 |
-
|
12 |
-
|
13 |
|
14 |
def custom_invoice_and_amount_verifier(invoice_numbers: InvoiceNumbers, amount: PaymentAmount, ground_truth_invoices: str, ground_truth_amount: str) -> bool:
|
15 |
-
|
16 |
|
17 |
def process_pdf_file(pdf_file_path: str, ground_truth_invoices: str, ground_truth_amount: str):
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
invoice_and_amount_verifier=invoice_and_amount_verifier
|
26 |
-
)
|
27 |
-
|
28 |
-
verified_candidate, unverified_candidate = extracted_result
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
35 |
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
result += "\n"
|
42 |
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
48 |
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
# Create the Gradio interface
|
56 |
with gr.Blocks() as iface:
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
|
78 |
# Launch the interface
|
79 |
iface.launch()
|
|
|
5 |
from remittance_pdf_processor import process_pdf_with_flow
|
6 |
from remittance_pdf_processing_types import Candidate, InvoiceNumbers, PaymentAmount, ProcessedPDFResult
|
7 |
from remittance_pdf_processing_utils import remittance_logger, format_amount_str_to_decimal
|
8 |
+
from decimal import Decimal, InvalidOperation
|
9 |
|
10 |
+
def is_valid_decimal(s: str) -> bool:
|
11 |
+
try:
|
12 |
+
Decimal(s)
|
13 |
+
return True
|
14 |
+
except InvalidOperation:
|
15 |
+
return False
|
16 |
|
17 |
def custom_invoice_numbers_verifier(invoice_numbers: InvoiceNumbers, ground_truth_invoices: str) -> InvoiceNumbers:
|
18 |
+
ground_truth_set = set(map(str.lower, map(str.strip, ground_truth_invoices.split(','))))
|
19 |
+
return [num for num in invoice_numbers if num.lower() in ground_truth_set]
|
20 |
|
21 |
def custom_invoice_and_amount_verifier(invoice_numbers: InvoiceNumbers, amount: PaymentAmount, ground_truth_invoices: str, ground_truth_amount: str) -> bool:
|
22 |
+
return format_amount_str_to_decimal(amount) == format_amount_str_to_decimal(ground_truth_amount)
|
23 |
|
24 |
def process_pdf_file(pdf_file_path: str, ground_truth_invoices: str, ground_truth_amount: str):
|
25 |
+
# Input validation
|
26 |
+
if not ground_truth_invoices.strip():
|
27 |
+
return "Error: Ground Truth Invoices field cannot be empty."
|
28 |
+
|
29 |
+
if not ground_truth_amount.strip():
|
30 |
+
return "Error: Ground Truth Amount field cannot be empty."
|
31 |
+
|
32 |
+
if not is_valid_decimal(ground_truth_amount.strip()):
|
33 |
+
return "Error: Ground Truth Amount must be a valid decimal number."
|
34 |
|
35 |
+
try:
|
36 |
+
invoice_verifier = lambda x: custom_invoice_numbers_verifier(x, ground_truth_invoices)
|
37 |
+
invoice_and_amount_verifier = lambda x, y: custom_invoice_and_amount_verifier(x, y, ground_truth_invoices, ground_truth_amount)
|
|
|
|
|
|
|
|
|
38 |
|
39 |
+
extracted_result: ProcessedPDFResult = process_pdf_with_flow(
|
40 |
+
pdf_file_path,
|
41 |
+
invoice_verifier=invoice_verifier,
|
42 |
+
invoice_and_amount_verifier=invoice_and_amount_verifier
|
43 |
+
)
|
44 |
+
|
45 |
+
verified_candidate, unverified_candidate = extracted_result
|
46 |
|
47 |
+
result = ""
|
48 |
+
result += f"Extracted invoice numbers and payment amounts:\n"
|
49 |
+
result += f"Ground Truth Invoices: {ground_truth_invoices}\n"
|
50 |
+
result += f"Ground Truth Amount: {ground_truth_amount}\n"
|
51 |
+
result += "-" * 40 + "\n"
|
|
|
52 |
|
53 |
+
# Print Invoice Numbers
|
54 |
+
result += "Invoice Numbers:\n"
|
55 |
+
for i, invoice_numbers in enumerate(unverified_candidate[0], 1):
|
56 |
+
status = "Verified" if any(set(invoice_numbers) == set(v) for v in verified_candidate[0]) else "Unverified"
|
57 |
+
result += f" Candidate {i} ({status}): {', '.join(invoice_numbers)}\n"
|
58 |
+
result += "\n"
|
59 |
|
60 |
+
# Print Payment Amounts
|
61 |
+
result += "Payment Amounts:\n"
|
62 |
+
for i, amount in enumerate(unverified_candidate[1], 1):
|
63 |
+
status = "Verified" if amount in verified_candidate[1] else "Unverified"
|
64 |
+
result += f" Candidate {i} ({status}): {amount}\n"
|
65 |
|
66 |
+
return result
|
67 |
+
except Exception as e:
|
68 |
+
error_message = f"An error occurred while processing the PDF: {str(e)}"
|
69 |
+
remittance_logger.error(error_message, exc_info=True)
|
70 |
+
return error_message
|
71 |
+
|
72 |
# Create the Gradio interface
|
73 |
with gr.Blocks() as iface:
|
74 |
+
gr.Markdown("# Remittance PDF Processor")
|
75 |
+
gr.Markdown("Upload a PDF file to extract invoice numbers and payment amounts. Provide ground truth data for comparison.")
|
76 |
+
|
77 |
+
with gr.Row():
|
78 |
+
with gr.Column(scale=1):
|
79 |
+
pdf_input = PDF(label="Upload PDF", visible=True)
|
80 |
+
|
81 |
+
with gr.Column(scale=1):
|
82 |
+
ground_truth_invoices = gr.Textbox(label="Ground Truth Invoice Numbers (comma-separated)")
|
83 |
+
ground_truth_amount = gr.Textbox(label="Ground Truth Payment Amount")
|
84 |
+
|
85 |
+
submit_button = gr.Button("Process PDF")
|
86 |
+
|
87 |
+
output = gr.Textbox(label="Processing Results", lines=20)
|
88 |
+
|
89 |
+
submit_button.click(
|
90 |
+
process_pdf_file,
|
91 |
+
inputs=[pdf_input, ground_truth_invoices, ground_truth_amount],
|
92 |
+
outputs=[output]
|
93 |
+
)
|
94 |
|
95 |
# Launch the interface
|
96 |
iface.launch()
|