xelpmocAI commited on
Commit
40a85e4
·
verified ·
1 Parent(s): 4a7bfda

working op ost prob

Browse files
Files changed (1) hide show
  1. app.py +89 -43
app.py CHANGED
@@ -8,9 +8,6 @@ import torch
8
  from ast import literal_eval
9
  from PIL import Image
10
 
11
- import logging
12
- logging.basicConfig(level=logging.INFO)
13
-
14
  # Load the model on the available device(s)
15
  model = Qwen2VLForConditionalGeneration.from_pretrained(
16
  "Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto"
@@ -46,8 +43,18 @@ tax_deductions = '''Extract the following information in the given format:
46
  }
47
  '''
48
 
49
- def demo(image_path, prompt):
50
- messages = [
 
 
 
 
 
 
 
 
 
 
51
  {
52
  "role": "user",
53
  "content": [
@@ -55,18 +62,25 @@ def demo(image_path, prompt):
55
  "type": "image",
56
  "image": image_path, # Use the file path here
57
  },
58
- {"type": "text", "text": prompt},
 
 
 
 
 
 
 
 
 
 
 
59
  ],
60
  }
61
  ]
62
- logging.info("Step 1: Preparing inference")
63
- # Preparation for inference
64
  text = processor.apply_chat_template(
65
  messages, tokenize=False, add_generation_prompt=True
66
  )
67
- logging.info("2")
68
  image_inputs, video_inputs = process_vision_info(messages)
69
- logging.info("3")
70
  inputs = processor(
71
  text=[text],
72
  images=image_inputs,
@@ -74,60 +88,92 @@ def demo(image_path, prompt):
74
  padding=True,
75
  return_tensors="pt",
76
  )
77
- logging.info("4")
78
  inputs = inputs.to("cuda")
79
- logging.info("5")
80
  # Inference: Generation of the output
81
  generated_ids = model.generate(**inputs, max_new_tokens=1500)
82
- logging.info("6")
83
  generated_ids_trimmed = [
84
  out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
85
  ]
86
- logging.info("7")
87
  output_text = processor.batch_decode(
88
  generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
89
  )
90
- logging.info("8", output_text)
91
-
92
  # Handle output text to convert it into JSON
93
  try:
94
- almost_json = output_text[0].split('\n')[-1].split('\n')[0]
 
 
95
  json = literal_eval(almost_json)
96
  except:
97
- json = output_text[0] # Return raw output if JSON parsing fails
98
- return json
99
-
100
- def process_document(image):
101
- # Log the function start
102
- logging.info("Started processing the document")
103
-
104
- # Save the uploaded image to a temporary file
105
- with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp_file:
106
- image = Image.fromarray(image) # Convert NumPy array to PIL Image
107
- image.save(tmp_file.name) # Save the image to the temporary file
108
- image_path = tmp_file.name # Get the path of the saved file
109
-
110
- logging.info("The path made for image: %s", image_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
- # Process the image with your model
113
- one = demo(image_path, other_benifits)
114
- logging.info("Processed the image for other benefits")
115
-
116
- two = demo(image_path, tax_deductions)
117
- logging.info("Processed the image for tax deductions")
 
118
 
119
  json_op = {
120
- "tax_deductions": one,
121
- "other_benifits": two
122
  }
123
-
124
- # Optionally, delete the temporary file after use
125
  os.remove(image_path)
126
- logging.info("Temporary file deleted: %s", image_path)
127
 
128
  return json_op
129
 
130
-
131
  # Create Gradio interface
132
  demo = gr.Interface(
133
  fn=process_document,
 
8
  from ast import literal_eval
9
  from PIL import Image
10
 
 
 
 
11
  # Load the model on the available device(s)
12
  model = Qwen2VLForConditionalGeneration.from_pretrained(
13
  "Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto"
 
43
  }
44
  '''
45
 
46
+
47
+
48
+
49
+ def process_document(image):
50
+ # Save the uploaded image to a temporary file
51
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp_file:
52
+ image = Image.fromarray(image) # Convert NumPy array to PIL Image
53
+ image.save(tmp_file.name) # Save the image to the temporary file
54
+ image_path = tmp_file.name # Get the path of the saved file
55
+
56
+
57
+ messages = [
58
  {
59
  "role": "user",
60
  "content": [
 
62
  "type": "image",
63
  "image": image_path, # Use the file path here
64
  },
65
+ {"type": "text", "text": '''Extract the following information in the given format:
66
+ {
67
+ 'tax_deductions': {
68
+ 'federal:': {
69
+ 'withholding tax:': {'Amount':'', 'Year-To_Date':""},
70
+ 'ee social security tax:': {'Amount':'', 'Year-To_Date':""},
71
+ 'ee medicare tax:': {'Amount':'', 'Year-To_Date':""}},
72
+ 'california:': {
73
+ 'withholding tax:': {'Amount':'', 'Year-To_Date':""},
74
+ 'ee disability tax:': {'Amount':'', 'Year-To-Date':""}}},
75
+ }
76
+ '''},
77
  ],
78
  }
79
  ]
 
 
80
  text = processor.apply_chat_template(
81
  messages, tokenize=False, add_generation_prompt=True
82
  )
 
83
  image_inputs, video_inputs = process_vision_info(messages)
 
84
  inputs = processor(
85
  text=[text],
86
  images=image_inputs,
 
88
  padding=True,
89
  return_tensors="pt",
90
  )
 
91
  inputs = inputs.to("cuda")
 
92
  # Inference: Generation of the output
93
  generated_ids = model.generate(**inputs, max_new_tokens=1500)
 
94
  generated_ids_trimmed = [
95
  out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
96
  ]
 
97
  output_text = processor.batch_decode(
98
  generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
99
  )
 
 
100
  # Handle output text to convert it into JSON
101
  try:
102
+ # almost_json = output_text[0].replace('```\n', '').replace('\n```', '')
103
+ almost_json = output_text[0].split('```\n')[-1].split('\n```')[0]
104
+
105
  json = literal_eval(almost_json)
106
  except:
107
+ try:
108
+ # almost_json = output_text[0].replace('```json\n', '').replace('\n```', '')
109
+ almost_json = output_text[0].split('```json\n')[-1].split('\n```')[0]
110
+ json = literal_eval(almost_json)
111
+ except:
112
+ json = output_text[0]
113
+
114
+ messages = [
115
+ {
116
+ "role": "user",
117
+ "content": [
118
+ {
119
+ "type": "image",
120
+ "image": image_path, # Use the file path here
121
+ },
122
+ {"type": "text", "text": '''Extract the following information in the given format:
123
+ {'other_benefits_and_information': {
124
+ '401k eru: {'This Period':'', 'Year-to-Date':''}},
125
+ 'quota summary':
126
+ {
127
+ 'sick:': '',
128
+ 'vacation:': '',
129
+ }
130
+ 'payment method': 'eg. Direct payment',
131
+ 'Amount': 'eg. 12.99'
132
+ }
133
+ '''},
134
+ ],
135
+ }
136
+ ]
137
+ text = processor.apply_chat_template(
138
+ messages, tokenize=False, add_generation_prompt=True
139
+ )
140
+ image_inputs, video_inputs = process_vision_info(messages)
141
+ inputs = processor(
142
+ text=[text],
143
+ images=image_inputs,
144
+ videos=video_inputs,
145
+ padding=True,
146
+ return_tensors="pt",
147
+ )
148
+ inputs = inputs.to("cuda")
149
+ # Inference: Generation of the output
150
+ generated_ids = model.generate(**inputs, max_new_tokens=1500)
151
+ generated_ids_trimmed = [
152
+ out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
153
+ ]
154
+ output_text = processor.batch_decode(
155
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
156
+ )
157
+ try:
158
+ almost_json_2 = output_text[0].split('```\n')[-1].split('\n```')[0]
159
 
160
+ json_2 = literal_eval(almost_json_2)
161
+ except:
162
+ try:
163
+ almost_json_2 = output_text[0].split('```json\n')[-1].split('\n```')[0]
164
+ json_2 = literal_eval(almost_json_2)
165
+ except:
166
+ json_2 = output_text[0]
167
 
168
  json_op = {
169
+ "tax_deductions": json,
170
+ "other_benifits": json_2
171
  }
172
+ # Optionally, you can delete the temporary file after use
 
173
  os.remove(image_path)
 
174
 
175
  return json_op
176
 
 
177
  # Create Gradio interface
178
  demo = gr.Interface(
179
  fn=process_document,