Spaces:
Paused
Paused
import os | |
import tempfile | |
import gradio as gr | |
import numpy as np | |
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor | |
from qwen_vl_utils import process_vision_info | |
import torch | |
from ast import literal_eval | |
from PIL import Image | |
# Load the model on the available device(s) | |
model = Qwen2VLForConditionalGeneration.from_pretrained( | |
"Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto" | |
) | |
# Load the processor | |
processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct") | |
# Define your prompts | |
other_benifits = '''Extract the following information in the given format: | |
{'other_benefits_and_information': { | |
'401k eru: {'This Period':'', 'Year-to-Date':''}}, | |
'quota summary': | |
{ | |
'sick:': '', | |
'vacation:': '', | |
} | |
'payment method': 'eg. Direct payment', | |
'Amount': 'eg. 12.99' | |
} | |
''' | |
tax_deductions = '''Extract the following information in the given format: | |
{ | |
'tax_deductions': { | |
'federal:': { | |
'withholding tax:': {'Amount':'', 'Year-To_Date':""}, | |
'ee social security tax:': {'Amount':'', 'Year-To_Date':""}, | |
'ee medicare tax:': {'Amount':'', 'Year-To_Date':""}}, | |
'california:': { | |
'withholding tax:': {'Amount':'', 'Year-To_Date':""}, | |
'ee disability tax:': {'Amount':'', 'Year-To-Date':""}}}, | |
} | |
''' | |
def process_function(image_path, prompt): | |
messages = [ | |
{ | |
"role": "user", | |
"content": [ | |
{ | |
"type": "image", | |
"image": image_path, # Use the file path here | |
}, | |
{"type": "text", "text": prompt}, | |
], | |
} | |
] | |
# Preparation for inference | |
text = processor.apply_chat_template( | |
messages, tokenize=False, add_generation_prompt=True | |
) | |
image_inputs, video_inputs = process_vision_info(messages) | |
inputs = processor( | |
text=[text], | |
images=image_inputs, | |
videos=video_inputs, | |
padding=True, | |
return_tensors="pt", | |
) | |
inputs = inputs.to("cuda") | |
# Inference: Generation of the output | |
generated_ids = model.generate(**inputs, max_new_tokens=1500) | |
generated_ids_trimmed = [ | |
out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids) | |
] | |
output_text = processor.batch_decode( | |
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False | |
) | |
# Handle output text to convert it into JSON | |
try: | |
almost_json = output_text[0].split('```\n')[-1].split('\n```')[0] | |
json = literal_eval(almost_json) | |
except: | |
try: | |
almost_json = output_text[0].split('```json\n')[-1].split('\n```')[0] | |
json = literal_eval(almost_json) | |
except: | |
json = output_text[0] | |
return json | |
def process_document(image): | |
# Save the uploaded image to a temporary file | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp_file: | |
image = Image.fromarray(image) # Convert NumPy array to PIL Image | |
image.save(tmp_file.name) # Save the image to the temporary file | |
image_path = tmp_file.name # Get the path of the saved file | |
# Process the image with your model | |
one = process_function(image_path, other_benifits) | |
two = process_function(image_path, tax_deductions) | |
# Optionally, you can delete the temporary file after use | |
os.remove(image_path) | |
return one, two | |
# def process_document(image): | |
# with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp_file: | |
# image = Image.fromarray(image) | |
# image.save(tmp_file.name) | |
# image_path = tmp_file.name | |
# messages = [ | |
# { | |
# "role": "user", | |
# "content": [ | |
# { | |
# "type": "image", | |
# "image": image_path, | |
# }, | |
# {"type": "text", "text": '''Extract the following information in the given format: | |
# { | |
# 'tax_deductions': { | |
# 'federal:': { | |
# 'withholding tax:': {'Amount':'', 'Year-To_Date':""}, | |
# 'ee social security tax:': {'Amount':'', 'Year-To_Date':""}, | |
# 'ee medicare tax:': {'Amount':'', 'Year-To_Date':""}}, | |
# 'california:': { | |
# 'withholding tax:': {'Amount':'', 'Year-To_Date':""}, | |
# 'ee disability tax:': {'Amount':'', 'Year-To-Date':""}}}, | |
# }'''}, | |
# ], | |
# } | |
# ] | |
# text = processor.apply_chat_template( | |
# messages, tokenize=False, add_generation_prompt=True | |
# ) | |
# image_inputs, video_inputs = process_vision_info(messages) | |
# inputs = processor( | |
# text=[text], | |
# images=image_inputs, | |
# videos=video_inputs, | |
# padding=True, | |
# return_tensors="pt", | |
# ) | |
# inputs = inputs.to("cuda") | |
# generated_ids = model.generate(**inputs, max_new_tokens=1500) | |
# generated_ids_trimmed = [ | |
# out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids) | |
# ] | |
# output_text = processor.batch_decode( | |
# generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False | |
# ) | |
# try: | |
# almost_json = output_text[0].split('```\n')[-1].split('\n```')[0] | |
# json = literal_eval(almost_json) | |
# except: | |
# try: | |
# almost_json = output_text[0].split('```json\n')[-1].split('\n```')[0] | |
# json = literal_eval(almost_json) | |
# except: | |
# json = output_text[0] | |
# messages = [ | |
# { | |
# "role": "user", | |
# "content": [ | |
# { | |
# "type": "image", | |
# "image": image_path, | |
# }, | |
# {"type": "text", "text": '''Extract the following information in the given format: | |
# {'other_benefits_and_information': { | |
# '401k eru: {'This Period':'', 'Year-to-Date':''}}, | |
# 'quota summary': | |
# { | |
# 'sick:': '', | |
# 'vacation:': '', | |
# } | |
# 'payment method': 'eg. Direct payment', | |
# 'Amount': 'eg. 12.99' | |
# }'''}, | |
# ], | |
# } | |
# ] | |
# text = processor.apply_chat_template( | |
# messages, tokenize=False, add_generation_prompt=True | |
# ) | |
# image_inputs, video_inputs = process_vision_info(messages) | |
# inputs = processor( | |
# text=[text], | |
# images=image_inputs, | |
# videos=video_inputs, | |
# padding=True, | |
# return_tensors="pt", | |
# ) | |
# inputs = inputs.to("cuda") | |
# # Inference: Generation of the output | |
# generated_ids = model.generate(**inputs, max_new_tokens=1500) | |
# generated_ids_trimmed = [ | |
# out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids) | |
# ] | |
# output_text = processor.batch_decode( | |
# generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False | |
# ) | |
# try: | |
# almost_json_2 = output_text[0].split('```\n')[-1].split('\n```')[0] | |
# json_2 = literal_eval(almost_json_2) | |
# except: | |
# try: | |
# almost_json_2 = output_text[0].split('```json\n')[-1].split('\n```')[0] | |
# json_2 = literal_eval(almost_json_2) | |
# except: | |
# json_2 = output_text[0] | |
# # json_op = { | |
# # "tax_deductions": json, | |
# # "other_benifits": json_2 | |
# # } | |
# # # Optionally, you can delete the temporary file after use | |
# os.remove(image_path) | |
# return json, json_2 | |
# Create Gradio interface | |
demo = gr.Interface( | |
fn=process_document, | |
inputs="image", # Gradio will handle the image input | |
outputs=[ | |
gr.JSON(label="Tax Deductions Information"), # First output box with heading | |
gr.JSON(label="Other Benefits and Information") # Second output box with heading | |
], | |
title="<div style='text-align: center;'>Information Extraction From PaySlip</div>", | |
examples=[["Slip_1.jpg"], ["Slip_2.jpg"]], | |
cache_examples=False | |
) | |
demo.launch() | |