PaySlip_Demo

Paused

App Files Files Community

xelpmocAI commited on Oct 22, 2024

Commit

40a85e4

verified ·

1 Parent(s): 4a7bfda

working op ost prob

Browse files

Files changed (1) hide show

app.py +89 -43

app.py CHANGED Viewed

@@ -8,9 +8,6 @@ import torch
 from ast import literal_eval
 from PIL import Image
-import logging
-logging.basicConfig(level=logging.INFO)
 # Load the model on the available device(s)
 model = Qwen2VLForConditionalGeneration.from_pretrained(
     "Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto"
@@ -46,8 +43,18 @@ tax_deductions = '''Extract the following information in the given format:
                 }
 '''
-def demo(image_path, prompt):
-    messages = [
         {
             "role": "user",
             "content": [
@@ -55,18 +62,25 @@ def demo(image_path, prompt):
                     "type": "image",
                     "image": image_path,  # Use the file path here
                 },
-                {"type": "text", "text":  prompt},
             ],
         }
     ]
-    logging.info("Step 1: Preparing inference")
-    # Preparation for inference
     text = processor.apply_chat_template(
         messages, tokenize=False, add_generation_prompt=True
     )
-    logging.info("2")
     image_inputs, video_inputs = process_vision_info(messages)
-    logging.info("3")
     inputs = processor(
         text=[text],
         images=image_inputs,
@@ -74,60 +88,92 @@ def demo(image_path, prompt):
         padding=True,
         return_tensors="pt",
     )
-    logging.info("4")
     inputs = inputs.to("cuda")
-    logging.info("5")
     # Inference: Generation of the output
     generated_ids = model.generate(**inputs, max_new_tokens=1500)
-    logging.info("6")
     generated_ids_trimmed = [
         out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
     ]
-    logging.info("7")
     output_text = processor.batch_decode(
         generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
     )
-    logging.info("8", output_text)
     # Handle output text to convert it into JSON
     try:
-        almost_json = output_text[0].split('\n')[-1].split('\n')[0]
         json = literal_eval(almost_json)
     except:
-        json = output_text[0]  # Return raw output if JSON parsing fails
-    return json
-def process_document(image):
-    # Log the function start
-    logging.info("Started processing the document")
-    # Save the uploaded image to a temporary file
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp_file:
-        image = Image.fromarray(image)  # Convert NumPy array to PIL Image
-        image.save(tmp_file.name)  # Save the image to the temporary file
-        image_path = tmp_file.name  # Get the path of the saved file
-    logging.info("The path made for image: %s", image_path)
-    # Process the image with your model
-    one = demo(image_path, other_benifits)
-    logging.info("Processed the image for other benefits")
-    two = demo(image_path, tax_deductions)
-    logging.info("Processed the image for tax deductions")
     json_op = {
-        "tax_deductions": one,
-        "other_benifits": two
     }
-    # Optionally, delete the temporary file after use
     os.remove(image_path)
-    logging.info("Temporary file deleted: %s", image_path)
     return json_op
 # Create Gradio interface
 demo = gr.Interface(
     fn=process_document,

 from ast import literal_eval
 from PIL import Image
 # Load the model on the available device(s)
 model = Qwen2VLForConditionalGeneration.from_pretrained(
     "Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto"
                 }
 '''
+def process_document(image):
+    # Save the uploaded image to a temporary file
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp_file:
+        image = Image.fromarray(image)  # Convert NumPy array to PIL Image
+        image.save(tmp_file.name)  # Save the image to the temporary file
+        image_path = tmp_file.name  # Get the path of the saved file
+        messages = [
         {
             "role": "user",
             "content": [
                     "type": "image",
                     "image": image_path,  # Use the file path here
                 },
+                {"type": "text", "text":  '''Extract the following information in the given format:
+                {
+                'tax_deductions': {
+                    'federal:': {
+                        'withholding tax:': {'Amount':'', 'Year-To_Date':""},
+                        'ee social security tax:': {'Amount':'', 'Year-To_Date':""},
+                        'ee medicare tax:': {'Amount':'', 'Year-To_Date':""}},
+                    'california:': {
+                        'withholding tax:': {'Amount':'', 'Year-To_Date':""},
+                        'ee disability tax:': {'Amount':'', 'Year-To-Date':""}}},
+                }
+'''},
             ],
         }
     ]
     text = processor.apply_chat_template(
         messages, tokenize=False, add_generation_prompt=True
     )
     image_inputs, video_inputs = process_vision_info(messages)
     inputs = processor(
         text=[text],
         images=image_inputs,
         padding=True,
         return_tensors="pt",
     )
     inputs = inputs.to("cuda")
     # Inference: Generation of the output
     generated_ids = model.generate(**inputs, max_new_tokens=1500)
     generated_ids_trimmed = [
         out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
     ]
     output_text = processor.batch_decode(
         generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
     )
     # Handle output text to convert it into JSON
     try:
+        # almost_json = output_text[0].replace('```\n', '').replace('\n```', '')
+        almost_json = output_text[0].split('```\n')[-1].split('\n```')[0]
         json = literal_eval(almost_json)
     except:
+        try:
+            # almost_json = output_text[0].replace('```json\n', '').replace('\n```', '')
+            almost_json = output_text[0].split('```json\n')[-1].split('\n```')[0]
+            json = literal_eval(almost_json)
+        except:
+            json = output_text[0]
+        messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image",
+                    "image": image_path,  # Use the file path here
+                },
+                {"type": "text", "text":  '''Extract the following information in the given format:
+        {'other_benefits_and_information': {
+            '401k eru: {'This Period':'', 'Year-to-Date':''}},
+            'quota summary':
+                            {
+                            'sick:': '',
+                            'vacation:': '',
+                            }
+            'payment method': 'eg. Direct payment',
+            'Amount': 'eg. 12.99'
+                            }
+'''},
+            ],
+        }
+    ]
+    text = processor.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+    image_inputs, video_inputs = process_vision_info(messages)
+    inputs = processor(
+        text=[text],
+        images=image_inputs,
+        videos=video_inputs,
+        padding=True,
+        return_tensors="pt",
+    )
+    inputs = inputs.to("cuda")
+    # Inference: Generation of the output
+    generated_ids = model.generate(**inputs, max_new_tokens=1500)
+    generated_ids_trimmed = [
+        out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+    ]
+    output_text = processor.batch_decode(
+        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+    )
+    try:
+        almost_json_2 = output_text[0].split('```\n')[-1].split('\n```')[0]
+        json_2 = literal_eval(almost_json_2)
+    except:
+        try:
+            almost_json_2 = output_text[0].split('```json\n')[-1].split('\n```')[0]
+            json_2 = literal_eval(almost_json_2)
+        except:
+            json_2 = output_text[0]
     json_op = {
+        "tax_deductions": json,
+        "other_benifits": json_2
     }
+    # Optionally, you can delete the temporary file after use
     os.remove(image_path)
     return json_op
 # Create Gradio interface
 demo = gr.Interface(
     fn=process_document,