eelang commited on
Commit
b4bc5f7
·
verified ·
1 Parent(s): 6b7c7ef

Added validation for the verification fields

Browse files
Files changed (1) hide show
  1. app.py +71 -54
app.py CHANGED
@@ -5,75 +5,92 @@ from gradio_pdf import PDF
5
  from remittance_pdf_processor import process_pdf_with_flow
6
  from remittance_pdf_processing_types import Candidate, InvoiceNumbers, PaymentAmount, ProcessedPDFResult
7
  from remittance_pdf_processing_utils import remittance_logger, format_amount_str_to_decimal
 
8
 
 
 
 
 
 
 
9
 
10
  def custom_invoice_numbers_verifier(invoice_numbers: InvoiceNumbers, ground_truth_invoices: str) -> InvoiceNumbers:
11
- ground_truth_set = set(map(str.lower, map(str.strip, ground_truth_invoices.split(','))))
12
- return [num for num in invoice_numbers if num.lower() in ground_truth_set]
13
 
14
  def custom_invoice_and_amount_verifier(invoice_numbers: InvoiceNumbers, amount: PaymentAmount, ground_truth_invoices: str, ground_truth_amount: str) -> bool:
15
- return format_amount_str_to_decimal(amount) == format_amount_str_to_decimal(ground_truth_amount)
16
 
17
  def process_pdf_file(pdf_file_path: str, ground_truth_invoices: str, ground_truth_amount: str):
18
- try:
19
- invoice_verifier = lambda x: custom_invoice_numbers_verifier(x, ground_truth_invoices)
20
- invoice_and_amount_verifier = lambda x, y: custom_invoice_and_amount_verifier(x, y, ground_truth_invoices, ground_truth_amount)
 
 
 
 
 
 
21
 
22
- extracted_result: ProcessedPDFResult = process_pdf_with_flow(
23
- pdf_file_path,
24
- invoice_verifier=invoice_verifier,
25
- invoice_and_amount_verifier=invoice_and_amount_verifier
26
- )
27
-
28
- verified_candidate, unverified_candidate = extracted_result
29
 
30
- result = ""
31
- result += f"Extracted invoice numbers and payment amounts:\n"
32
- result += f"Ground Truth Invoices: {ground_truth_invoices}\n"
33
- result += f"Ground Truth Amount: {ground_truth_amount}\n"
34
- result += "-" * 40 + "\n"
 
 
35
 
36
- # Print Invoice Numbers
37
- result += "Invoice Numbers:\n"
38
- for i, invoice_numbers in enumerate(unverified_candidate[0], 1):
39
- status = "Verified" if any(set(invoice_numbers) == set(v) for v in verified_candidate[0]) else "Unverified"
40
- result += f" Candidate {i} ({status}): {', '.join(invoice_numbers)}\n"
41
- result += "\n"
42
 
43
- # Print Payment Amounts
44
- result += "Payment Amounts:\n"
45
- for i, amount in enumerate(unverified_candidate[1], 1):
46
- status = "Verified" if amount in verified_candidate[1] else "Unverified"
47
- result += f" Candidate {i} ({status}): {amount}\n"
 
48
 
49
- return result
50
- except Exception as e:
51
- error_message = f"An error occurred while processing the PDF: {str(e)}"
52
- remittance_logger.error(error_message, exc_info=True)
53
- return error_message
54
 
 
 
 
 
 
 
55
  # Create the Gradio interface
56
  with gr.Blocks() as iface:
57
- gr.Markdown("# Remittance PDF Processor")
58
- gr.Markdown("Upload a PDF file to extract invoice numbers and payment amounts. Provide ground truth data for comparison.")
59
-
60
- with gr.Row():
61
- with gr.Column(scale=1):
62
- pdf_input = PDF(label="Upload PDF", visible=True)
63
-
64
- with gr.Column(scale=1):
65
- ground_truth_invoices = gr.Textbox(label="Ground Truth Invoice Numbers (comma-separated)")
66
- ground_truth_amount = gr.Textbox(label="Ground Truth Payment Amount")
67
-
68
- submit_button = gr.Button("Process PDF")
69
-
70
- output = gr.Textbox(label="Processing Results", lines=20)
71
-
72
- submit_button.click(
73
- process_pdf_file,
74
- inputs=[pdf_input, ground_truth_invoices, ground_truth_amount],
75
- outputs=[output]
76
- )
77
 
78
  # Launch the interface
79
  iface.launch()
 
5
  from remittance_pdf_processor import process_pdf_with_flow
6
  from remittance_pdf_processing_types import Candidate, InvoiceNumbers, PaymentAmount, ProcessedPDFResult
7
  from remittance_pdf_processing_utils import remittance_logger, format_amount_str_to_decimal
8
+ from decimal import Decimal, InvalidOperation
9
 
10
+ def is_valid_decimal(s: str) -> bool:
11
+ try:
12
+ Decimal(s)
13
+ return True
14
+ except InvalidOperation:
15
+ return False
16
 
17
  def custom_invoice_numbers_verifier(invoice_numbers: InvoiceNumbers, ground_truth_invoices: str) -> InvoiceNumbers:
18
+ ground_truth_set = set(map(str.lower, map(str.strip, ground_truth_invoices.split(','))))
19
+ return [num for num in invoice_numbers if num.lower() in ground_truth_set]
20
 
21
  def custom_invoice_and_amount_verifier(invoice_numbers: InvoiceNumbers, amount: PaymentAmount, ground_truth_invoices: str, ground_truth_amount: str) -> bool:
22
+ return format_amount_str_to_decimal(amount) == format_amount_str_to_decimal(ground_truth_amount)
23
 
24
  def process_pdf_file(pdf_file_path: str, ground_truth_invoices: str, ground_truth_amount: str):
25
+ # Input validation
26
+ if not ground_truth_invoices.strip():
27
+ return "Error: Ground Truth Invoices field cannot be empty."
28
+
29
+ if not ground_truth_amount.strip():
30
+ return "Error: Ground Truth Amount field cannot be empty."
31
+
32
+ if not is_valid_decimal(ground_truth_amount.strip()):
33
+ return "Error: Ground Truth Amount must be a valid decimal number."
34
 
35
+ try:
36
+ invoice_verifier = lambda x: custom_invoice_numbers_verifier(x, ground_truth_invoices)
37
+ invoice_and_amount_verifier = lambda x, y: custom_invoice_and_amount_verifier(x, y, ground_truth_invoices, ground_truth_amount)
 
 
 
 
38
 
39
+ extracted_result: ProcessedPDFResult = process_pdf_with_flow(
40
+ pdf_file_path,
41
+ invoice_verifier=invoice_verifier,
42
+ invoice_and_amount_verifier=invoice_and_amount_verifier
43
+ )
44
+
45
+ verified_candidate, unverified_candidate = extracted_result
46
 
47
+ result = ""
48
+ result += f"Extracted invoice numbers and payment amounts:\n"
49
+ result += f"Ground Truth Invoices: {ground_truth_invoices}\n"
50
+ result += f"Ground Truth Amount: {ground_truth_amount}\n"
51
+ result += "-" * 40 + "\n"
 
52
 
53
+ # Print Invoice Numbers
54
+ result += "Invoice Numbers:\n"
55
+ for i, invoice_numbers in enumerate(unverified_candidate[0], 1):
56
+ status = "Verified" if any(set(invoice_numbers) == set(v) for v in verified_candidate[0]) else "Unverified"
57
+ result += f" Candidate {i} ({status}): {', '.join(invoice_numbers)}\n"
58
+ result += "\n"
59
 
60
+ # Print Payment Amounts
61
+ result += "Payment Amounts:\n"
62
+ for i, amount in enumerate(unverified_candidate[1], 1):
63
+ status = "Verified" if amount in verified_candidate[1] else "Unverified"
64
+ result += f" Candidate {i} ({status}): {amount}\n"
65
 
66
+ return result
67
+ except Exception as e:
68
+ error_message = f"An error occurred while processing the PDF: {str(e)}"
69
+ remittance_logger.error(error_message, exc_info=True)
70
+ return error_message
71
+
72
  # Create the Gradio interface
73
  with gr.Blocks() as iface:
74
+ gr.Markdown("# Remittance PDF Processor")
75
+ gr.Markdown("Upload a PDF file to extract invoice numbers and payment amounts. Provide ground truth data for comparison.")
76
+
77
+ with gr.Row():
78
+ with gr.Column(scale=1):
79
+ pdf_input = PDF(label="Upload PDF", visible=True)
80
+
81
+ with gr.Column(scale=1):
82
+ ground_truth_invoices = gr.Textbox(label="Ground Truth Invoice Numbers (comma-separated)")
83
+ ground_truth_amount = gr.Textbox(label="Ground Truth Payment Amount")
84
+
85
+ submit_button = gr.Button("Process PDF")
86
+
87
+ output = gr.Textbox(label="Processing Results", lines=20)
88
+
89
+ submit_button.click(
90
+ process_pdf_file,
91
+ inputs=[pdf_input, ground_truth_invoices, ground_truth_amount],
92
+ outputs=[output]
93
+ )
94
 
95
  # Launch the interface
96
  iface.launch()