Spaces:
Running
on
Zero
Running
on
Zero
get @torch .autocast(device_type=cuda, dtype=torch.bfloat16)
Browse files
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import traceback
|
2 |
-
import
|
3 |
from typing import Optional
|
4 |
import spaces
|
5 |
import gradio as gr
|
@@ -22,8 +22,7 @@ import torch
|
|
22 |
from transformers import AutoModelForCausalLM
|
23 |
from transformers import AutoProcessor
|
24 |
|
25 |
-
|
26 |
-
logger = logging.getLogger(__name__)
|
27 |
|
28 |
# Define repository and local directory
|
29 |
repo_id = "microsoft/OmniParser-v2.0" # HF repo
|
@@ -40,7 +39,7 @@ magam_model.to("cuda")
|
|
40 |
# Download the entire repository
|
41 |
snapshot_download(repo_id=repo_id, local_dir=local_dir)
|
42 |
|
43 |
-
|
44 |
|
45 |
|
46 |
yolo_model = get_yolo_model(model_path='weights/icon_detect/model.pt')
|
@@ -79,6 +78,7 @@ DEVICE = torch.device('cuda')
|
|
79 |
|
80 |
@spaces.GPU
|
81 |
@torch.inference_mode()
|
|
|
82 |
def get_som_response(instruction, image_som):
|
83 |
prompt = magma_som_prompt.format(instruction)
|
84 |
if magam_model.config.mm_use_image_start_end:
|
@@ -95,10 +95,9 @@ def get_som_response(instruction, image_som):
|
|
95 |
|
96 |
inputs = magma_processor(images=[image_som], texts=prompt, return_tensors="pt")
|
97 |
inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
|
98 |
-
# inputs['pixel_values'] = inputs['pixel_values'].to(torch.bfloat16)
|
99 |
inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
|
100 |
-
inputs = inputs.to("cuda")
|
101 |
-
|
102 |
|
103 |
magam_model.generation_config.pad_token_id = magma_processor.tokenizer.pad_token_id
|
104 |
with torch.inference_mode():
|
@@ -118,6 +117,7 @@ def get_som_response(instruction, image_som):
|
|
118 |
|
119 |
@spaces.GPU
|
120 |
@torch.inference_mode()
|
|
|
121 |
def get_qa_response(instruction, image):
|
122 |
prompt = magma_qa_prompt.format(instruction)
|
123 |
if magam_model.config.mm_use_image_start_end:
|
@@ -135,7 +135,8 @@ def get_qa_response(instruction, image):
|
|
135 |
inputs = magma_processor(images=[image], texts=prompt, return_tensors="pt")
|
136 |
inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
|
137 |
inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
|
138 |
-
inputs = inputs.to("cuda")
|
|
|
139 |
|
140 |
magam_model.generation_config.pad_token_id = magma_processor.tokenizer.pad_token_id
|
141 |
with torch.inference_mode():
|
@@ -165,7 +166,7 @@ def process(
|
|
165 |
instruction,
|
166 |
) -> Optional[Image.Image]:
|
167 |
|
168 |
-
logger.
|
169 |
try:
|
170 |
# image_save_path = 'imgs/saved_image_demo.png'
|
171 |
# image_input.save(image_save_path)
|
@@ -184,7 +185,7 @@ def process(
|
|
184 |
parsed_content_list = '\n'.join([f'icon {i}: ' + str(v) for i,v in enumerate(parsed_content_list)])
|
185 |
|
186 |
if len(instruction) == 0:
|
187 |
-
|
188 |
image = Image.open(io.BytesIO(base64.b64decode(dino_labled_img)))
|
189 |
return image, str(parsed_content_list)
|
190 |
|
@@ -210,7 +211,7 @@ def process(
|
|
210 |
label_coordinates[key] = [val[0] / image_input.size[0], val[1] / image_input.size[1], val[2] / image_input.size[0], val[3] / image_input.size[1]]
|
211 |
|
212 |
magma_response = get_som_response(instruction, image_som)
|
213 |
-
|
214 |
|
215 |
# map magma_response into the mark id
|
216 |
mark_id = extract_mark_id(magma_response)
|
@@ -258,14 +259,14 @@ def process(
|
|
258 |
except:
|
259 |
image_som = image_input
|
260 |
|
261 |
-
logger.
|
262 |
return image_som, str(parsed_content_list)
|
263 |
except Exception as e:
|
264 |
error_message = traceback.format_exc()
|
265 |
logger.warning(error_message)
|
266 |
return image_input, error_message
|
267 |
|
268 |
-
logger.
|
269 |
with gr.Blocks() as demo:
|
270 |
gr.Markdown(MARKDOWN)
|
271 |
with gr.Row():
|
|
|
1 |
import traceback
|
2 |
+
from logging import getLogger
|
3 |
from typing import Optional
|
4 |
import spaces
|
5 |
import gradio as gr
|
|
|
22 |
from transformers import AutoModelForCausalLM
|
23 |
from transformers import AutoProcessor
|
24 |
|
25 |
+
logger = getLogger(__name__)
|
|
|
26 |
|
27 |
# Define repository and local directory
|
28 |
repo_id = "microsoft/OmniParser-v2.0" # HF repo
|
|
|
39 |
# Download the entire repository
|
40 |
snapshot_download(repo_id=repo_id, local_dir=local_dir)
|
41 |
|
42 |
+
logger.warning(f"Repository downloaded to: {local_dir}")
|
43 |
|
44 |
|
45 |
yolo_model = get_yolo_model(model_path='weights/icon_detect/model.pt')
|
|
|
78 |
|
79 |
@spaces.GPU
|
80 |
@torch.inference_mode()
|
81 |
+
@torch.autocast(device_type="cuda", dtype=torch.bfloat16)
|
82 |
def get_som_response(instruction, image_som):
|
83 |
prompt = magma_som_prompt.format(instruction)
|
84 |
if magam_model.config.mm_use_image_start_end:
|
|
|
95 |
|
96 |
inputs = magma_processor(images=[image_som], texts=prompt, return_tensors="pt")
|
97 |
inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
|
|
|
98 |
inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
|
99 |
+
# inputs = inputs.to("cuda")
|
100 |
+
inputs = inputs.to("cuda", dtype=torch.bfloat16)
|
101 |
|
102 |
magam_model.generation_config.pad_token_id = magma_processor.tokenizer.pad_token_id
|
103 |
with torch.inference_mode():
|
|
|
117 |
|
118 |
@spaces.GPU
|
119 |
@torch.inference_mode()
|
120 |
+
@torch.autocast(device_type="cuda", dtype=torch.bfloat16)
|
121 |
def get_qa_response(instruction, image):
|
122 |
prompt = magma_qa_prompt.format(instruction)
|
123 |
if magam_model.config.mm_use_image_start_end:
|
|
|
135 |
inputs = magma_processor(images=[image], texts=prompt, return_tensors="pt")
|
136 |
inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
|
137 |
inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
|
138 |
+
# inputs = inputs.to("cuda")
|
139 |
+
inputs = inputs.to("cuda", dtype=torch.bfloat16)
|
140 |
|
141 |
magam_model.generation_config.pad_token_id = magma_processor.tokenizer.pad_token_id
|
142 |
with torch.inference_mode():
|
|
|
166 |
instruction,
|
167 |
) -> Optional[Image.Image]:
|
168 |
|
169 |
+
logger.warning("Starting processing.")
|
170 |
try:
|
171 |
# image_save_path = 'imgs/saved_image_demo.png'
|
172 |
# image_input.save(image_save_path)
|
|
|
185 |
parsed_content_list = '\n'.join([f'icon {i}: ' + str(v) for i,v in enumerate(parsed_content_list)])
|
186 |
|
187 |
if len(instruction) == 0:
|
188 |
+
logger.warning('finish processing')
|
189 |
image = Image.open(io.BytesIO(base64.b64decode(dino_labled_img)))
|
190 |
return image, str(parsed_content_list)
|
191 |
|
|
|
211 |
label_coordinates[key] = [val[0] / image_input.size[0], val[1] / image_input.size[1], val[2] / image_input.size[0], val[3] / image_input.size[1]]
|
212 |
|
213 |
magma_response = get_som_response(instruction, image_som)
|
214 |
+
logger.warning("magma repsonse: ", magma_response)
|
215 |
|
216 |
# map magma_response into the mark id
|
217 |
mark_id = extract_mark_id(magma_response)
|
|
|
259 |
except:
|
260 |
image_som = image_input
|
261 |
|
262 |
+
logger.warning("finish processing")
|
263 |
return image_som, str(parsed_content_list)
|
264 |
except Exception as e:
|
265 |
error_message = traceback.format_exc()
|
266 |
logger.warning(error_message)
|
267 |
return image_input, error_message
|
268 |
|
269 |
+
logger.warning("Starting App.")
|
270 |
with gr.Blocks() as demo:
|
271 |
gr.Markdown(MARKDOWN)
|
272 |
with gr.Row():
|