xelpmocAI commited on
Commit
965cbb7
·
verified ·
1 Parent(s): 807214f

text output

Browse files
Files changed (1) hide show
  1. app.py +20 -124
app.py CHANGED
@@ -7,6 +7,7 @@ from qwen_vl_utils import process_vision_info
7
  import torch
8
  from ast import literal_eval
9
  from PIL import Image
 
10
 
11
  # Load the model on the available device(s)
12
  model = Qwen2VLForConditionalGeneration.from_pretrained(
@@ -43,6 +44,20 @@ tax_deductions = '''Extract the following information in the given format:
43
  }
44
  '''
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  def process_function(image_path, prompt):
47
  messages = [
48
  {
@@ -100,8 +115,8 @@ def process_document(image):
100
  image_path = tmp_file.name # Get the path of the saved file
101
 
102
  # Process the image with your model
103
- one = process_function(image_path, other_benifits)
104
- two = process_function(image_path, tax_deductions)
105
 
106
 
107
  # Optionally, you can delete the temporary file after use
@@ -111,134 +126,15 @@ def process_document(image):
111
 
112
 
113
 
114
- # def process_document(image):
115
- # with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp_file:
116
- # image = Image.fromarray(image)
117
- # image.save(tmp_file.name)
118
- # image_path = tmp_file.name
119
-
120
-
121
- # messages = [
122
- # {
123
- # "role": "user",
124
- # "content": [
125
- # {
126
- # "type": "image",
127
- # "image": image_path,
128
- # },
129
- # {"type": "text", "text": '''Extract the following information in the given format:
130
- # {
131
- # 'tax_deductions': {
132
- # 'federal:': {
133
- # 'withholding tax:': {'Amount':'', 'Year-To_Date':""},
134
- # 'ee social security tax:': {'Amount':'', 'Year-To_Date':""},
135
- # 'ee medicare tax:': {'Amount':'', 'Year-To_Date':""}},
136
- # 'california:': {
137
- # 'withholding tax:': {'Amount':'', 'Year-To_Date':""},
138
- # 'ee disability tax:': {'Amount':'', 'Year-To-Date':""}}},
139
- # }'''},
140
- # ],
141
- # }
142
- # ]
143
- # text = processor.apply_chat_template(
144
- # messages, tokenize=False, add_generation_prompt=True
145
- # )
146
- # image_inputs, video_inputs = process_vision_info(messages)
147
- # inputs = processor(
148
- # text=[text],
149
- # images=image_inputs,
150
- # videos=video_inputs,
151
- # padding=True,
152
- # return_tensors="pt",
153
- # )
154
- # inputs = inputs.to("cuda")
155
- # generated_ids = model.generate(**inputs, max_new_tokens=1500)
156
- # generated_ids_trimmed = [
157
- # out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
158
- # ]
159
- # output_text = processor.batch_decode(
160
- # generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
161
- # )
162
- # try:
163
- # almost_json = output_text[0].split('```\n')[-1].split('\n```')[0]
164
-
165
- # json = literal_eval(almost_json)
166
- # except:
167
- # try:
168
- # almost_json = output_text[0].split('```json\n')[-1].split('\n```')[0]
169
- # json = literal_eval(almost_json)
170
- # except:
171
- # json = output_text[0]
172
-
173
- # messages = [
174
- # {
175
- # "role": "user",
176
- # "content": [
177
- # {
178
- # "type": "image",
179
- # "image": image_path,
180
- # },
181
- # {"type": "text", "text": '''Extract the following information in the given format:
182
- # {'other_benefits_and_information': {
183
- # '401k eru: {'This Period':'', 'Year-to-Date':''}},
184
- # 'quota summary':
185
- # {
186
- # 'sick:': '',
187
- # 'vacation:': '',
188
- # }
189
- # 'payment method': 'eg. Direct payment',
190
- # 'Amount': 'eg. 12.99'
191
- # }'''},
192
- # ],
193
- # }
194
- # ]
195
- # text = processor.apply_chat_template(
196
- # messages, tokenize=False, add_generation_prompt=True
197
- # )
198
- # image_inputs, video_inputs = process_vision_info(messages)
199
- # inputs = processor(
200
- # text=[text],
201
- # images=image_inputs,
202
- # videos=video_inputs,
203
- # padding=True,
204
- # return_tensors="pt",
205
- # )
206
- # inputs = inputs.to("cuda")
207
- # # Inference: Generation of the output
208
- # generated_ids = model.generate(**inputs, max_new_tokens=1500)
209
- # generated_ids_trimmed = [
210
- # out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
211
- # ]
212
- # output_text = processor.batch_decode(
213
- # generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
214
- # )
215
- # try:
216
- # almost_json_2 = output_text[0].split('```\n')[-1].split('\n```')[0]
217
-
218
- # json_2 = literal_eval(almost_json_2)
219
- # except:
220
- # try:
221
- # almost_json_2 = output_text[0].split('```json\n')[-1].split('\n```')[0]
222
- # json_2 = literal_eval(almost_json_2)
223
- # except:
224
- # json_2 = output_text[0]
225
-
226
- # # json_op = {
227
- # # "tax_deductions": json,
228
- # # "other_benifits": json_2
229
- # # }
230
- # # # Optionally, you can delete the temporary file after use
231
- # os.remove(image_path)
232
-
233
- # return json, json_2
234
 
235
  # Create Gradio interface
236
  demo = gr.Interface(
237
  fn=process_document,
238
  inputs="image", # Gradio will handle the image input
239
  outputs=[
240
- gr.JSON(label="Tax Deductions Information"), # First output box with heading
241
- gr.JSON(label="Other Benefits and Information") # Second output box with heading
242
  ],
243
  title="<div style='text-align: center;'>Information Extraction From PaySlip</div>",
244
  examples=[["Slip_1.jpg"], ["Slip_2.jpg"]],
 
7
  import torch
8
  from ast import literal_eval
9
  from PIL import Image
10
+ import json
11
 
12
  # Load the model on the available device(s)
13
  model = Qwen2VLForConditionalGeneration.from_pretrained(
 
44
  }
45
  '''
46
 
47
+ def format_nested_dict(data, indent=0):
48
+ formatted_str = ""
49
+ indent_str = " " * indent # Indentation for the current level
50
+
51
+ for key, value in data.items():
52
+ # If value is a dictionary, recurse deeper
53
+ if isinstance(value, dict):
54
+ formatted_str += f"{indent_str}{key}:\n"
55
+ formatted_str += format_nested_dict(value, indent + 1)
56
+ else:
57
+ formatted_str += f"{indent_str}{key}: {value}\n"
58
+
59
+ return formatted_str
60
+
61
  def process_function(image_path, prompt):
62
  messages = [
63
  {
 
115
  image_path = tmp_file.name # Get the path of the saved file
116
 
117
  # Process the image with your model
118
+ one = format_nested_dict(process_function(image_path, other_benifits))
119
+ two = format_nested_dict(process_function(image_path, tax_deductions))
120
 
121
 
122
  # Optionally, you can delete the temporary file after use
 
126
 
127
 
128
 
129
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
  # Create Gradio interface
132
  demo = gr.Interface(
133
  fn=process_document,
134
  inputs="image", # Gradio will handle the image input
135
  outputs=[
136
+ gr.Textbox(label="Tax Deductions Information"), # First output box with heading
137
+ gr.Textbox(label="Other Benefits and Information") # Second output box with heading
138
  ],
139
  title="<div style='text-align: center;'>Information Extraction From PaySlip</div>",
140
  examples=[["Slip_1.jpg"], ["Slip_2.jpg"]],