PaySlip_Demo

Paused

App Files Files Community

xelpmocAI commited on Oct 22, 2024

Commit

807214f

verified ·

1 Parent(s): 1463298

modular and title center

Browse files

Files changed (1) hide show

app.py +146 -81

app.py CHANGED Viewed

@@ -43,38 +43,20 @@ tax_deductions = '''Extract the following information in the given format:
                 }
 '''
-def process_document(image):
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp_file:
-        image = Image.fromarray(image)
-        image.save(tmp_file.name)
-        image_path = tmp_file.name
-        messages = [
         {
             "role": "user",
             "content": [
                 {
                     "type": "image",
-                    "image": image_path,
                 },
-                {"type": "text", "text":  '''Extract the following information in the given format:
-                {
-                'tax_deductions': {
-                    'federal:': {
-                        'withholding tax:': {'Amount':'', 'Year-To_Date':""},
-                        'ee social security tax:': {'Amount':'', 'Year-To_Date':""},
-                        'ee medicare tax:': {'Amount':'', 'Year-To_Date':""}},
-                    'california:': {
-                        'withholding tax:': {'Amount':'', 'Year-To_Date':""},
-                        'ee disability tax:': {'Amount':'', 'Year-To-Date':""}}},
-                }'''},
             ],
         }
     ]
     text = processor.apply_chat_template(
         messages, tokenize=False, add_generation_prompt=True
     )
@@ -87,6 +69,7 @@ def process_document(image):
         return_tensors="pt",
     )
     inputs = inputs.to("cuda")
     generated_ids = model.generate(**inputs, max_new_tokens=1500)
     generated_ids_trimmed = [
         out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
@@ -94,6 +77,8 @@ def process_document(image):
     output_text = processor.batch_decode(
         generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
     )
     try:
         almost_json = output_text[0].split('```\n')[-1].split('\n```')[0]
@@ -105,67 +90,147 @@ def process_document(image):
         except:
             json = output_text[0]
-    messages = [
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "image",
-                    "image": image_path,
-                },
-                {"type": "text", "text":  '''Extract the following information in the given format:
-        {'other_benefits_and_information': {
-            '401k eru: {'This Period':'', 'Year-to-Date':''}},
-            'quota summary':
-                            {
-                            'sick:': '',
-                            'vacation:': '',
-                            }
-            'payment method': 'eg. Direct payment',
-            'Amount': 'eg. 12.99'
-                            }'''},
-            ],
-        }
-    ]
-    text = processor.apply_chat_template(
-        messages, tokenize=False, add_generation_prompt=True
-    )
-    image_inputs, video_inputs = process_vision_info(messages)
-    inputs = processor(
-        text=[text],
-        images=image_inputs,
-        videos=video_inputs,
-        padding=True,
-        return_tensors="pt",
-    )
-    inputs = inputs.to("cuda")
-    # Inference: Generation of the output
-    generated_ids = model.generate(**inputs, max_new_tokens=1500)
-    generated_ids_trimmed = [
-        out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
-    ]
-    output_text = processor.batch_decode(
-        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
-    )
-    try:
-        almost_json_2 = output_text[0].split('```\n')[-1].split('\n```')[0]
-        json_2 = literal_eval(almost_json_2)
-    except:
-        try:
-            almost_json_2 = output_text[0].split('```json\n')[-1].split('\n```')[0]
-            json_2 = literal_eval(almost_json_2)
-        except:
-            json_2 = output_text[0]
-    # json_op = {
-    #     "tax_deductions": json,
-    #     "other_benifits": json_2
-    # }
-    # # Optionally, you can delete the temporary file after use
     os.remove(image_path)
-    return json, json_2
 # Create Gradio interface
 demo = gr.Interface(
@@ -175,7 +240,7 @@ demo = gr.Interface(
         gr.JSON(label="Tax Deductions Information"),  # First output box with heading
         gr.JSON(label="Other Benefits and Information")  # Second output box with heading
     ],
-    title="PaySlip_Demo_Model",
     examples=[["Slip_1.jpg"], ["Slip_2.jpg"]],
     cache_examples=False
 )

                 }
 '''
+def process_function(image_path, prompt):
+    messages = [
         {
             "role": "user",
             "content": [
                 {
                     "type": "image",
+                    "image": image_path,  # Use the file path here
                 },
+                {"type": "text", "text":  prompt},
             ],
         }
     ]
+    # Preparation for inference
     text = processor.apply_chat_template(
         messages, tokenize=False, add_generation_prompt=True
     )
         return_tensors="pt",
     )
     inputs = inputs.to("cuda")
+    # Inference: Generation of the output
     generated_ids = model.generate(**inputs, max_new_tokens=1500)
     generated_ids_trimmed = [
         out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
     output_text = processor.batch_decode(
         generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
     )
+    # Handle output text to convert it into JSON
     try:
         almost_json = output_text[0].split('```\n')[-1].split('\n```')[0]
         except:
             json = output_text[0]
+    return json
+def process_document(image):
+    # Save the uploaded image to a temporary file
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp_file:
+        image = Image.fromarray(image)  # Convert NumPy array to PIL Image
+        image.save(tmp_file.name)  # Save the image to the temporary file
+        image_path = tmp_file.name  # Get the path of the saved file
+    # Process the image with your model
+    one = process_function(image_path, other_benifits)
+    two = process_function(image_path, tax_deductions)
+    # Optionally, you can delete the temporary file after use
     os.remove(image_path)
+    return one, two
+# def process_document(image):
+#     with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp_file:
+#         image = Image.fromarray(image)
+#         image.save(tmp_file.name)
+#         image_path = tmp_file.name
+#         messages = [
+#         {
+#             "role": "user",
+#             "content": [
+#                 {
+#                     "type": "image",
+#                     "image": image_path,
+#                 },
+#                 {"type": "text", "text":  '''Extract the following information in the given format:
+#                 {
+#                 'tax_deductions': {
+#                     'federal:': {
+#                         'withholding tax:': {'Amount':'', 'Year-To_Date':""},
+#                         'ee social security tax:': {'Amount':'', 'Year-To_Date':""},
+#                         'ee medicare tax:': {'Amount':'', 'Year-To_Date':""}},
+#                     'california:': {
+#                         'withholding tax:': {'Amount':'', 'Year-To_Date':""},
+#                         'ee disability tax:': {'Amount':'', 'Year-To-Date':""}}},
+#                 }'''},
+#             ],
+#         }
+#     ]
+#     text = processor.apply_chat_template(
+#         messages, tokenize=False, add_generation_prompt=True
+#     )
+#     image_inputs, video_inputs = process_vision_info(messages)
+#     inputs = processor(
+#         text=[text],
+#         images=image_inputs,
+#         videos=video_inputs,
+#         padding=True,
+#         return_tensors="pt",
+#     )
+#     inputs = inputs.to("cuda")
+#     generated_ids = model.generate(**inputs, max_new_tokens=1500)
+#     generated_ids_trimmed = [
+#         out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+#     ]
+#     output_text = processor.batch_decode(
+#         generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+#     )
+#     try:
+#         almost_json = output_text[0].split('```\n')[-1].split('\n```')[0]
+#         json = literal_eval(almost_json)
+#     except:
+#         try:
+#             almost_json = output_text[0].split('```json\n')[-1].split('\n```')[0]
+#             json = literal_eval(almost_json)
+#         except:
+#             json = output_text[0]
+#     messages = [
+#         {
+#             "role": "user",
+#             "content": [
+#                 {
+#                     "type": "image",
+#                     "image": image_path,
+#                 },
+#                 {"type": "text", "text":  '''Extract the following information in the given format:
+#         {'other_benefits_and_information': {
+#             '401k eru: {'This Period':'', 'Year-to-Date':''}},
+#             'quota summary':
+#                             {
+#                             'sick:': '',
+#                             'vacation:': '',
+#                             }
+#             'payment method': 'eg. Direct payment',
+#             'Amount': 'eg. 12.99'
+#                             }'''},
+#             ],
+#         }
+#     ]
+#     text = processor.apply_chat_template(
+#         messages, tokenize=False, add_generation_prompt=True
+#     )
+#     image_inputs, video_inputs = process_vision_info(messages)
+#     inputs = processor(
+#         text=[text],
+#         images=image_inputs,
+#         videos=video_inputs,
+#         padding=True,
+#         return_tensors="pt",
+#     )
+#     inputs = inputs.to("cuda")
+#     # Inference: Generation of the output
+#     generated_ids = model.generate(**inputs, max_new_tokens=1500)
+#     generated_ids_trimmed = [
+#         out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+#     ]
+#     output_text = processor.batch_decode(
+#         generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+#     )
+#     try:
+#         almost_json_2 = output_text[0].split('```\n')[-1].split('\n```')[0]
+#         json_2 = literal_eval(almost_json_2)
+#     except:
+#         try:
+#             almost_json_2 = output_text[0].split('```json\n')[-1].split('\n```')[0]
+#             json_2 = literal_eval(almost_json_2)
+#         except:
+#             json_2 = output_text[0]
+#     # json_op = {
+#     #     "tax_deductions": json,
+#     #     "other_benifits": json_2
+#     # }
+#     # # Optionally, you can delete the temporary file after use
+#     os.remove(image_path)
+#     return json, json_2
 # Create Gradio interface
 demo = gr.Interface(
         gr.JSON(label="Tax Deductions Information"),  # First output box with heading
         gr.JSON(label="Other Benefits and Information")  # Second output box with heading
     ],
+    title="<div style='text-align: center;'>Information Extraction From PaySlip</div>",
     examples=[["Slip_1.jpg"], ["Slip_2.jpg"]],
     cache_examples=False
 )