xelpmocAI commited on
Commit
6082f24
·
verified ·
1 Parent(s): bb83a45
Files changed (1) hide show
  1. app.py +18 -34
app.py CHANGED
@@ -7,17 +7,15 @@ import torch
7
  from ast import literal_eval
8
  from PIL import Image
9
 
10
-
11
- # default: Load the model on the available device(s)
12
  model = Qwen2VLForConditionalGeneration.from_pretrained(
13
  "Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto"
14
  )
15
 
16
- # default processer
17
  processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
18
 
19
-
20
-
21
  other_benifits = '''Extract the following information in the given format:
22
  {'other_benefits_and_information': {
23
  '401k eru: {'This Period':'', 'Year-to-Date':''}},
@@ -44,10 +42,10 @@ tax_deductions = '''Extract the following information in the given format:
44
  }
45
  '''
46
 
47
- def demo(image_name, prompt):
 
 
48
 
49
- print("Inside Demo")
50
-
51
  messages = [
52
  {
53
  "role": "user",
@@ -61,8 +59,6 @@ def demo(image_name, prompt):
61
  }
62
  ]
63
 
64
- print(f"Formulated prompt template {messages}")
65
-
66
  # Preparation for inference
67
  text = processor.apply_chat_template(
68
  messages, tokenize=False, add_generation_prompt=True
@@ -85,45 +81,33 @@ def demo(image_name, prompt):
85
  output_text = processor.batch_decode(
86
  generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
87
  )
 
 
88
  try:
89
- # almost_json = output_text[0].replace('```\n', '').replace('\n```', '')
90
- almost_json = output_text[0].split('```\n')[-1].split('\n```')[0]
91
-
92
  json = literal_eval(almost_json)
93
  except:
94
- try:
95
- # almost_json = output_text[0].replace('```json\n', '').replace('\n```', '')
96
- almost_json = output_text[0].split('```json\n')[-1].split('\n```')[0]
97
- json = literal_eval(almost_json)
98
- except:
99
- json = output_text[0]
100
  return json
101
 
102
  def process_document(image):
103
- print(f"Received Image --->>>>>> {image}")
104
- if isinstance(image, np.ndarray):
105
- print("Image is in Numpy array")
106
- image = Image.fromarray(image)
107
- print(type(image))
108
- print("Proceeding with the demo")
109
  one = demo(image, other_benifits)
110
  two = demo(image, tax_deductions)
111
  json_op = {
112
  "tax_deductions": one,
113
  "other_benifits": two
114
- }
115
  return json_op
116
 
117
- # article = "<p style='text-align: center'><a href='https://www.xelpmoc.in/' target='_blank'>Made by Xelpmoc</a></p>"
118
-
119
  demo = gr.Interface(
120
  fn=process_document,
121
- inputs=gr.Image(type="pil"),
122
  outputs="json",
123
  title="PaySlip_Demo_Model",
124
- # article=article,
125
- # enable_queue=True,
126
- examples=["Slip_1.jpg", "Slip_2.jpg"],
127
- cache_examples=False)
128
 
129
- demo.launch()
 
7
  from ast import literal_eval
8
  from PIL import Image
9
 
10
+ # Load the model on the available device(s)
 
11
  model = Qwen2VLForConditionalGeneration.from_pretrained(
12
  "Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto"
13
  )
14
 
15
+ # Load the processor
16
  processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
17
 
18
+ # Define your prompts
 
19
  other_benifits = '''Extract the following information in the given format:
20
  {'other_benefits_and_information': {
21
  '401k eru: {'This Period':'', 'Year-to-Date':''}},
 
42
  }
43
  '''
44
 
45
+ def demo(image, prompt):
46
+ # Convert image to bytes or suitable format if needed
47
+ image_name = image # If you're directly using the image
48
 
 
 
49
  messages = [
50
  {
51
  "role": "user",
 
59
  }
60
  ]
61
 
 
 
62
  # Preparation for inference
63
  text = processor.apply_chat_template(
64
  messages, tokenize=False, add_generation_prompt=True
 
81
  output_text = processor.batch_decode(
82
  generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
83
  )
84
+
85
+ # Handle output text to convert it into JSON
86
  try:
87
+ almost_json = output_text[0].split('\n')[-1].split('\n')[0]
 
 
88
  json = literal_eval(almost_json)
89
  except:
90
+ json = output_text[0] # Return raw output if JSON parsing fails
 
 
 
 
 
91
  return json
92
 
93
  def process_document(image):
94
+ # Pass the image directly
 
 
 
 
 
95
  one = demo(image, other_benifits)
96
  two = demo(image, tax_deductions)
97
  json_op = {
98
  "tax_deductions": one,
99
  "other_benifits": two
100
+ }
101
  return json_op
102
 
103
+ # Create Gradio interface
 
104
  demo = gr.Interface(
105
  fn=process_document,
106
+ inputs="image", # Gradio will handle the image input
107
  outputs="json",
108
  title="PaySlip_Demo_Model",
109
+ examples=[["Slip_1.jpg"], ["Slip_2.jpg"]],
110
+ cache_examples=False
111
+ )
 
112
 
113
+ demo.launch()