xelpmocAI commited on
Commit
807214f
·
verified ·
1 Parent(s): 1463298

modular and title center

Browse files
Files changed (1) hide show
  1. app.py +146 -81
app.py CHANGED
@@ -43,38 +43,20 @@ tax_deductions = '''Extract the following information in the given format:
43
  }
44
  '''
45
 
46
-
47
-
48
-
49
- def process_document(image):
50
- with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp_file:
51
- image = Image.fromarray(image)
52
- image.save(tmp_file.name)
53
- image_path = tmp_file.name
54
-
55
-
56
- messages = [
57
  {
58
  "role": "user",
59
  "content": [
60
  {
61
  "type": "image",
62
- "image": image_path,
63
  },
64
- {"type": "text", "text": '''Extract the following information in the given format:
65
- {
66
- 'tax_deductions': {
67
- 'federal:': {
68
- 'withholding tax:': {'Amount':'', 'Year-To_Date':""},
69
- 'ee social security tax:': {'Amount':'', 'Year-To_Date':""},
70
- 'ee medicare tax:': {'Amount':'', 'Year-To_Date':""}},
71
- 'california:': {
72
- 'withholding tax:': {'Amount':'', 'Year-To_Date':""},
73
- 'ee disability tax:': {'Amount':'', 'Year-To-Date':""}}},
74
- }'''},
75
  ],
76
  }
77
  ]
 
78
  text = processor.apply_chat_template(
79
  messages, tokenize=False, add_generation_prompt=True
80
  )
@@ -87,6 +69,7 @@ def process_document(image):
87
  return_tensors="pt",
88
  )
89
  inputs = inputs.to("cuda")
 
90
  generated_ids = model.generate(**inputs, max_new_tokens=1500)
91
  generated_ids_trimmed = [
92
  out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
@@ -94,6 +77,8 @@ def process_document(image):
94
  output_text = processor.batch_decode(
95
  generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
96
  )
 
 
97
  try:
98
  almost_json = output_text[0].split('```\n')[-1].split('\n```')[0]
99
 
@@ -105,67 +90,147 @@ def process_document(image):
105
  except:
106
  json = output_text[0]
107
 
108
- messages = [
109
- {
110
- "role": "user",
111
- "content": [
112
- {
113
- "type": "image",
114
- "image": image_path,
115
- },
116
- {"type": "text", "text": '''Extract the following information in the given format:
117
- {'other_benefits_and_information': {
118
- '401k eru: {'This Period':'', 'Year-to-Date':''}},
119
- 'quota summary':
120
- {
121
- 'sick:': '',
122
- 'vacation:': '',
123
- }
124
- 'payment method': 'eg. Direct payment',
125
- 'Amount': 'eg. 12.99'
126
- }'''},
127
- ],
128
- }
129
- ]
130
- text = processor.apply_chat_template(
131
- messages, tokenize=False, add_generation_prompt=True
132
- )
133
- image_inputs, video_inputs = process_vision_info(messages)
134
- inputs = processor(
135
- text=[text],
136
- images=image_inputs,
137
- videos=video_inputs,
138
- padding=True,
139
- return_tensors="pt",
140
- )
141
- inputs = inputs.to("cuda")
142
- # Inference: Generation of the output
143
- generated_ids = model.generate(**inputs, max_new_tokens=1500)
144
- generated_ids_trimmed = [
145
- out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
146
- ]
147
- output_text = processor.batch_decode(
148
- generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
149
- )
150
- try:
151
- almost_json_2 = output_text[0].split('```\n')[-1].split('\n```')[0]
152
 
153
- json_2 = literal_eval(almost_json_2)
154
- except:
155
- try:
156
- almost_json_2 = output_text[0].split('```json\n')[-1].split('\n```')[0]
157
- json_2 = literal_eval(almost_json_2)
158
- except:
159
- json_2 = output_text[0]
160
 
161
- # json_op = {
162
- # "tax_deductions": json,
163
- # "other_benifits": json_2
164
- # }
165
- # # Optionally, you can delete the temporary file after use
166
  os.remove(image_path)
167
 
168
- return json, json_2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
  # Create Gradio interface
171
  demo = gr.Interface(
@@ -175,7 +240,7 @@ demo = gr.Interface(
175
  gr.JSON(label="Tax Deductions Information"), # First output box with heading
176
  gr.JSON(label="Other Benefits and Information") # Second output box with heading
177
  ],
178
- title="PaySlip_Demo_Model",
179
  examples=[["Slip_1.jpg"], ["Slip_2.jpg"]],
180
  cache_examples=False
181
  )
 
43
  }
44
  '''
45
 
46
+ def process_function(image_path, prompt):
47
+ messages = [
 
 
 
 
 
 
 
 
 
48
  {
49
  "role": "user",
50
  "content": [
51
  {
52
  "type": "image",
53
+ "image": image_path, # Use the file path here
54
  },
55
+ {"type": "text", "text": prompt},
 
 
 
 
 
 
 
 
 
 
56
  ],
57
  }
58
  ]
59
+ # Preparation for inference
60
  text = processor.apply_chat_template(
61
  messages, tokenize=False, add_generation_prompt=True
62
  )
 
69
  return_tensors="pt",
70
  )
71
  inputs = inputs.to("cuda")
72
+ # Inference: Generation of the output
73
  generated_ids = model.generate(**inputs, max_new_tokens=1500)
74
  generated_ids_trimmed = [
75
  out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
 
77
  output_text = processor.batch_decode(
78
  generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
79
  )
80
+
81
+ # Handle output text to convert it into JSON
82
  try:
83
  almost_json = output_text[0].split('```\n')[-1].split('\n```')[0]
84
 
 
90
  except:
91
  json = output_text[0]
92
 
93
+ return json
94
+
95
+ def process_document(image):
96
+ # Save the uploaded image to a temporary file
97
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp_file:
98
+ image = Image.fromarray(image) # Convert NumPy array to PIL Image
99
+ image.save(tmp_file.name) # Save the image to the temporary file
100
+ image_path = tmp_file.name # Get the path of the saved file
101
+
102
+ # Process the image with your model
103
+ one = process_function(image_path, other_benifits)
104
+ two = process_function(image_path, tax_deductions)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
 
 
 
 
 
 
 
106
 
107
+ # Optionally, you can delete the temporary file after use
 
 
 
 
108
  os.remove(image_path)
109
 
110
+ return one, two
111
+
112
+
113
+
114
+ # def process_document(image):
115
+ # with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp_file:
116
+ # image = Image.fromarray(image)
117
+ # image.save(tmp_file.name)
118
+ # image_path = tmp_file.name
119
+
120
+
121
+ # messages = [
122
+ # {
123
+ # "role": "user",
124
+ # "content": [
125
+ # {
126
+ # "type": "image",
127
+ # "image": image_path,
128
+ # },
129
+ # {"type": "text", "text": '''Extract the following information in the given format:
130
+ # {
131
+ # 'tax_deductions': {
132
+ # 'federal:': {
133
+ # 'withholding tax:': {'Amount':'', 'Year-To_Date':""},
134
+ # 'ee social security tax:': {'Amount':'', 'Year-To_Date':""},
135
+ # 'ee medicare tax:': {'Amount':'', 'Year-To_Date':""}},
136
+ # 'california:': {
137
+ # 'withholding tax:': {'Amount':'', 'Year-To_Date':""},
138
+ # 'ee disability tax:': {'Amount':'', 'Year-To-Date':""}}},
139
+ # }'''},
140
+ # ],
141
+ # }
142
+ # ]
143
+ # text = processor.apply_chat_template(
144
+ # messages, tokenize=False, add_generation_prompt=True
145
+ # )
146
+ # image_inputs, video_inputs = process_vision_info(messages)
147
+ # inputs = processor(
148
+ # text=[text],
149
+ # images=image_inputs,
150
+ # videos=video_inputs,
151
+ # padding=True,
152
+ # return_tensors="pt",
153
+ # )
154
+ # inputs = inputs.to("cuda")
155
+ # generated_ids = model.generate(**inputs, max_new_tokens=1500)
156
+ # generated_ids_trimmed = [
157
+ # out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
158
+ # ]
159
+ # output_text = processor.batch_decode(
160
+ # generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
161
+ # )
162
+ # try:
163
+ # almost_json = output_text[0].split('```\n')[-1].split('\n```')[0]
164
+
165
+ # json = literal_eval(almost_json)
166
+ # except:
167
+ # try:
168
+ # almost_json = output_text[0].split('```json\n')[-1].split('\n```')[0]
169
+ # json = literal_eval(almost_json)
170
+ # except:
171
+ # json = output_text[0]
172
+
173
+ # messages = [
174
+ # {
175
+ # "role": "user",
176
+ # "content": [
177
+ # {
178
+ # "type": "image",
179
+ # "image": image_path,
180
+ # },
181
+ # {"type": "text", "text": '''Extract the following information in the given format:
182
+ # {'other_benefits_and_information': {
183
+ # '401k eru: {'This Period':'', 'Year-to-Date':''}},
184
+ # 'quota summary':
185
+ # {
186
+ # 'sick:': '',
187
+ # 'vacation:': '',
188
+ # }
189
+ # 'payment method': 'eg. Direct payment',
190
+ # 'Amount': 'eg. 12.99'
191
+ # }'''},
192
+ # ],
193
+ # }
194
+ # ]
195
+ # text = processor.apply_chat_template(
196
+ # messages, tokenize=False, add_generation_prompt=True
197
+ # )
198
+ # image_inputs, video_inputs = process_vision_info(messages)
199
+ # inputs = processor(
200
+ # text=[text],
201
+ # images=image_inputs,
202
+ # videos=video_inputs,
203
+ # padding=True,
204
+ # return_tensors="pt",
205
+ # )
206
+ # inputs = inputs.to("cuda")
207
+ # # Inference: Generation of the output
208
+ # generated_ids = model.generate(**inputs, max_new_tokens=1500)
209
+ # generated_ids_trimmed = [
210
+ # out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
211
+ # ]
212
+ # output_text = processor.batch_decode(
213
+ # generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
214
+ # )
215
+ # try:
216
+ # almost_json_2 = output_text[0].split('```\n')[-1].split('\n```')[0]
217
+
218
+ # json_2 = literal_eval(almost_json_2)
219
+ # except:
220
+ # try:
221
+ # almost_json_2 = output_text[0].split('```json\n')[-1].split('\n```')[0]
222
+ # json_2 = literal_eval(almost_json_2)
223
+ # except:
224
+ # json_2 = output_text[0]
225
+
226
+ # # json_op = {
227
+ # # "tax_deductions": json,
228
+ # # "other_benifits": json_2
229
+ # # }
230
+ # # # Optionally, you can delete the temporary file after use
231
+ # os.remove(image_path)
232
+
233
+ # return json, json_2
234
 
235
  # Create Gradio interface
236
  demo = gr.Interface(
 
240
  gr.JSON(label="Tax Deductions Information"), # First output box with heading
241
  gr.JSON(label="Other Benefits and Information") # Second output box with heading
242
  ],
243
+ title="<div style='text-align: center;'>Information Extraction From PaySlip</div>",
244
  examples=[["Slip_1.jpg"], ["Slip_2.jpg"]],
245
  cache_examples=False
246
  )