drlon commited on
Commit
c6ba3e6
·
1 Parent(s): a3882be

get @torch .autocast(device_type=cuda, dtype=torch.bfloat16)

Browse files
Files changed (1) hide show
  1. app.py +14 -13
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import traceback
2
- import logging
3
  from typing import Optional
4
  import spaces
5
  import gradio as gr
@@ -22,8 +22,7 @@ import torch
22
  from transformers import AutoModelForCausalLM
23
  from transformers import AutoProcessor
24
 
25
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
26
- logger = logging.getLogger(__name__)
27
 
28
  # Define repository and local directory
29
  repo_id = "microsoft/OmniParser-v2.0" # HF repo
@@ -40,7 +39,7 @@ magam_model.to("cuda")
40
  # Download the entire repository
41
  snapshot_download(repo_id=repo_id, local_dir=local_dir)
42
 
43
- print(f"Repository downloaded to: {local_dir}")
44
 
45
 
46
  yolo_model = get_yolo_model(model_path='weights/icon_detect/model.pt')
@@ -79,6 +78,7 @@ DEVICE = torch.device('cuda')
79
 
80
  @spaces.GPU
81
  @torch.inference_mode()
 
82
  def get_som_response(instruction, image_som):
83
  prompt = magma_som_prompt.format(instruction)
84
  if magam_model.config.mm_use_image_start_end:
@@ -95,10 +95,9 @@ def get_som_response(instruction, image_som):
95
 
96
  inputs = magma_processor(images=[image_som], texts=prompt, return_tensors="pt")
97
  inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
98
- # inputs['pixel_values'] = inputs['pixel_values'].to(torch.bfloat16)
99
  inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
100
- inputs = inputs.to("cuda")
101
-
102
 
103
  magam_model.generation_config.pad_token_id = magma_processor.tokenizer.pad_token_id
104
  with torch.inference_mode():
@@ -118,6 +117,7 @@ def get_som_response(instruction, image_som):
118
 
119
  @spaces.GPU
120
  @torch.inference_mode()
 
121
  def get_qa_response(instruction, image):
122
  prompt = magma_qa_prompt.format(instruction)
123
  if magam_model.config.mm_use_image_start_end:
@@ -135,7 +135,8 @@ def get_qa_response(instruction, image):
135
  inputs = magma_processor(images=[image], texts=prompt, return_tensors="pt")
136
  inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
137
  inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
138
- inputs = inputs.to("cuda")
 
139
 
140
  magam_model.generation_config.pad_token_id = magma_processor.tokenizer.pad_token_id
141
  with torch.inference_mode():
@@ -165,7 +166,7 @@ def process(
165
  instruction,
166
  ) -> Optional[Image.Image]:
167
 
168
- logger.info("Starting processing.")
169
  try:
170
  # image_save_path = 'imgs/saved_image_demo.png'
171
  # image_input.save(image_save_path)
@@ -184,7 +185,7 @@ def process(
184
  parsed_content_list = '\n'.join([f'icon {i}: ' + str(v) for i,v in enumerate(parsed_content_list)])
185
 
186
  if len(instruction) == 0:
187
- print('finish processing')
188
  image = Image.open(io.BytesIO(base64.b64decode(dino_labled_img)))
189
  return image, str(parsed_content_list)
190
 
@@ -210,7 +211,7 @@ def process(
210
  label_coordinates[key] = [val[0] / image_input.size[0], val[1] / image_input.size[1], val[2] / image_input.size[0], val[3] / image_input.size[1]]
211
 
212
  magma_response = get_som_response(instruction, image_som)
213
- print("magma repsonse: ", magma_response)
214
 
215
  # map magma_response into the mark id
216
  mark_id = extract_mark_id(magma_response)
@@ -258,14 +259,14 @@ def process(
258
  except:
259
  image_som = image_input
260
 
261
- logger.info("finish processing")
262
  return image_som, str(parsed_content_list)
263
  except Exception as e:
264
  error_message = traceback.format_exc()
265
  logger.warning(error_message)
266
  return image_input, error_message
267
 
268
- logger.info("Starting App.")
269
  with gr.Blocks() as demo:
270
  gr.Markdown(MARKDOWN)
271
  with gr.Row():
 
1
  import traceback
2
+ from logging import getLogger
3
  from typing import Optional
4
  import spaces
5
  import gradio as gr
 
22
  from transformers import AutoModelForCausalLM
23
  from transformers import AutoProcessor
24
 
25
+ logger = getLogger(__name__)
 
26
 
27
  # Define repository and local directory
28
  repo_id = "microsoft/OmniParser-v2.0" # HF repo
 
39
  # Download the entire repository
40
  snapshot_download(repo_id=repo_id, local_dir=local_dir)
41
 
42
+ logger.warning(f"Repository downloaded to: {local_dir}")
43
 
44
 
45
  yolo_model = get_yolo_model(model_path='weights/icon_detect/model.pt')
 
78
 
79
  @spaces.GPU
80
  @torch.inference_mode()
81
+ @torch.autocast(device_type="cuda", dtype=torch.bfloat16)
82
  def get_som_response(instruction, image_som):
83
  prompt = magma_som_prompt.format(instruction)
84
  if magam_model.config.mm_use_image_start_end:
 
95
 
96
  inputs = magma_processor(images=[image_som], texts=prompt, return_tensors="pt")
97
  inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
 
98
  inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
99
+ # inputs = inputs.to("cuda")
100
+ inputs = inputs.to("cuda", dtype=torch.bfloat16)
101
 
102
  magam_model.generation_config.pad_token_id = magma_processor.tokenizer.pad_token_id
103
  with torch.inference_mode():
 
117
 
118
  @spaces.GPU
119
  @torch.inference_mode()
120
+ @torch.autocast(device_type="cuda", dtype=torch.bfloat16)
121
  def get_qa_response(instruction, image):
122
  prompt = magma_qa_prompt.format(instruction)
123
  if magam_model.config.mm_use_image_start_end:
 
135
  inputs = magma_processor(images=[image], texts=prompt, return_tensors="pt")
136
  inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
137
  inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
138
+ # inputs = inputs.to("cuda")
139
+ inputs = inputs.to("cuda", dtype=torch.bfloat16)
140
 
141
  magam_model.generation_config.pad_token_id = magma_processor.tokenizer.pad_token_id
142
  with torch.inference_mode():
 
166
  instruction,
167
  ) -> Optional[Image.Image]:
168
 
169
+ logger.warning("Starting processing.")
170
  try:
171
  # image_save_path = 'imgs/saved_image_demo.png'
172
  # image_input.save(image_save_path)
 
185
  parsed_content_list = '\n'.join([f'icon {i}: ' + str(v) for i,v in enumerate(parsed_content_list)])
186
 
187
  if len(instruction) == 0:
188
+ logger.warning('finish processing')
189
  image = Image.open(io.BytesIO(base64.b64decode(dino_labled_img)))
190
  return image, str(parsed_content_list)
191
 
 
211
  label_coordinates[key] = [val[0] / image_input.size[0], val[1] / image_input.size[1], val[2] / image_input.size[0], val[3] / image_input.size[1]]
212
 
213
  magma_response = get_som_response(instruction, image_som)
214
+ logger.warning("magma repsonse: ", magma_response)
215
 
216
  # map magma_response into the mark id
217
  mark_id = extract_mark_id(magma_response)
 
259
  except:
260
  image_som = image_input
261
 
262
+ logger.warning("finish processing")
263
  return image_som, str(parsed_content_list)
264
  except Exception as e:
265
  error_message = traceback.format_exc()
266
  logger.warning(error_message)
267
  return image_input, error_message
268
 
269
+ logger.warning("Starting App.")
270
  with gr.Blocks() as demo:
271
  gr.Markdown(MARKDOWN)
272
  with gr.Row():