Spaces:
Paused
Paused
add vllm
Browse files
app.py
CHANGED
@@ -8,21 +8,17 @@ import requests
|
|
8 |
from huggingface_hub import login
|
9 |
import torch
|
10 |
import torch.nn.functional as F
|
11 |
-
import spaces
|
12 |
import json
|
13 |
-
import gradio as gr
|
14 |
from huggingface_hub import snapshot_download
|
15 |
-
import
|
16 |
-
# from loadimg import load_img
|
17 |
-
import traceback
|
18 |
|
19 |
login(os.environ.get("HUGGINGFACE_TOKEN"))
|
|
|
20 |
|
21 |
repo_id = "mistralai/Pixtral-12B-2409"
|
22 |
-
|
23 |
-
|
24 |
-
max_img_per_msg = 5
|
25 |
-
|
26 |
|
27 |
title = "# **WIP / DEMO** 🙋🏻♂️Welcome to Tonic's Pixtral Model Demo"
|
28 |
description = """
|
@@ -40,9 +36,15 @@ with open(f'{model_path}/tekken.json', 'r') as f:
|
|
40 |
|
41 |
model_name = "mistralai/Pixtral-12B-2409"
|
42 |
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
-
|
|
|
46 |
|
47 |
def encode_image(image: Image.Image, image_format="PNG") -> str:
|
48 |
im_file = BytesIO()
|
@@ -51,11 +53,13 @@ def encode_image(image: Image.Image, image_format="PNG") -> str:
|
|
51 |
im_64 = base64.b64encode(im_bytes).decode("utf-8")
|
52 |
return im_64
|
53 |
|
54 |
-
def infer(image_url, prompt, progress=gr.Progress(track_tqdm=True)):
|
55 |
if llm is None:
|
56 |
return "Error: LLM initialization failed. Please try again later."
|
57 |
|
58 |
try:
|
|
|
|
|
59 |
image = Image.open(BytesIO(requests.get(image_url).content))
|
60 |
image = image.resize((3844, 2408))
|
61 |
new_image_url = f"data:image/png;base64,{encode_image(image, image_format='PNG')}"
|
@@ -68,16 +72,19 @@ def infer(image_url, prompt, progress=gr.Progress(track_tqdm=True)):
|
|
68 |
]
|
69 |
|
70 |
outputs = llm.chat(messages, sampling_params=sampling_params)
|
71 |
-
|
72 |
return outputs[0].outputs[0].text
|
73 |
except Exception as e:
|
|
|
74 |
return f"Error during inference: {e}"
|
75 |
|
76 |
-
def compare_images(image1_url, image2_url, prompt, progress=gr.Progress(track_tqdm=True)):
|
77 |
if llm is None:
|
78 |
return "Error: LLM initialization failed. Please try again later."
|
79 |
|
80 |
try:
|
|
|
|
|
81 |
image1 = Image.open(BytesIO(requests.get(image1_url).content))
|
82 |
image2 = Image.open(BytesIO(requests.get(image2_url).content))
|
83 |
image1 = image1.resize((3844, 2408))
|
@@ -97,9 +104,10 @@ def compare_images(image1_url, image2_url, prompt, progress=gr.Progress(track_tq
|
|
97 |
]
|
98 |
|
99 |
outputs = llm.chat(messages, sampling_params=sampling_params)
|
100 |
-
|
101 |
return outputs[0].outputs[0].text
|
102 |
except Exception as e:
|
|
|
103 |
return f"Error during image comparison: {e}"
|
104 |
|
105 |
def calculate_image_similarity(image1_url, image2_url):
|
@@ -120,9 +128,10 @@ def calculate_image_similarity(image1_url, image2_url):
|
|
120 |
embedding2 = llm.model.vision_encoder([image2_tensor])
|
121 |
|
122 |
similarity = F.cosine_similarity(embedding1.mean(dim=0), embedding2.mean(dim=0), dim=0).item()
|
123 |
-
|
124 |
return similarity
|
125 |
except Exception as e:
|
|
|
126 |
return f"Error during image similarity calculation: {e}"
|
127 |
|
128 |
with gr.Blocks() as demo:
|
@@ -137,10 +146,12 @@ with gr.Blocks() as demo:
|
|
137 |
1. For Image-to-Text Generation:
|
138 |
- Enter the URL of an image
|
139 |
- Provide a prompt describing what you want to know about the image
|
|
|
140 |
- Click "Generate" to get the model's response
|
141 |
2. For Image Comparison:
|
142 |
- Enter URLs for two images you want to compare
|
143 |
- Provide a prompt asking about the comparison
|
|
|
144 |
- Click "Compare" to get the model's analysis
|
145 |
3. For Image Similarity:
|
146 |
- Enter URLs for two images you want to compare
|
@@ -153,20 +164,26 @@ with gr.Blocks() as demo:
|
|
153 |
with gr.Row():
|
154 |
image_url = gr.Text(label="Image URL")
|
155 |
prompt = gr.Text(label="Prompt")
|
|
|
|
|
|
|
156 |
generate_button = gr.Button("Generate")
|
157 |
output = gr.Text(label="Generated Text")
|
158 |
|
159 |
-
generate_button.click(infer, inputs=[image_url, prompt], outputs=output)
|
160 |
|
161 |
with gr.TabItem("Image Comparison"):
|
162 |
with gr.Row():
|
163 |
image1_url = gr.Text(label="Image 1 URL")
|
164 |
image2_url = gr.Text(label="Image 2 URL")
|
165 |
comparison_prompt = gr.Text(label="Comparison Prompt")
|
|
|
|
|
|
|
166 |
compare_button = gr.Button("Compare")
|
167 |
comparison_output = gr.Text(label="Comparison Result")
|
168 |
|
169 |
-
compare_button.click(compare_images, inputs=[image1_url, image2_url, comparison_prompt], outputs=comparison_output)
|
170 |
|
171 |
with gr.TabItem("Image Similarity"):
|
172 |
with gr.Row():
|
@@ -187,4 +204,4 @@ with gr.Blocks() as demo:
|
|
187 |
gr.Markdown(f"- Patch Size: {params['vision_encoder']['patch_size']}x{params['vision_encoder']['patch_size']}")
|
188 |
|
189 |
if __name__ == "__main__":
|
190 |
-
demo.launch()
|
|
|
8 |
from huggingface_hub import login
|
9 |
import torch
|
10 |
import torch.nn.functional as F
|
11 |
+
# import spaces
|
12 |
import json
|
|
|
13 |
from huggingface_hub import snapshot_download
|
14 |
+
# import traceback
|
|
|
|
|
15 |
|
16 |
login(os.environ.get("HUGGINGFACE_TOKEN"))
|
17 |
+
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:80"
|
18 |
|
19 |
repo_id = "mistralai/Pixtral-12B-2409"
|
20 |
+
max_tokens_per_img = 2048
|
21 |
+
max_img_per_msg = 2
|
|
|
|
|
22 |
|
23 |
title = "# **WIP / DEMO** 🙋🏻♂️Welcome to Tonic's Pixtral Model Demo"
|
24 |
description = """
|
|
|
36 |
|
37 |
model_name = "mistralai/Pixtral-12B-2409"
|
38 |
|
39 |
+
llm = LLM(
|
40 |
+
model=model_name,
|
41 |
+
tokenizer_mode="mistral",
|
42 |
+
max_num_batched_tokens=max_img_per_msg * max_tokens_per_img,
|
43 |
+
dtype="float16"
|
44 |
+
)
|
45 |
|
46 |
+
def clear_cuda_cache():
|
47 |
+
torch.cuda.empty_cache()
|
48 |
|
49 |
def encode_image(image: Image.Image, image_format="PNG") -> str:
|
50 |
im_file = BytesIO()
|
|
|
53 |
im_64 = base64.b64encode(im_bytes).decode("utf-8")
|
54 |
return im_64
|
55 |
|
56 |
+
def infer(image_url, prompt, temperature, max_tokens, progress=gr.Progress(track_tqdm=True)):
|
57 |
if llm is None:
|
58 |
return "Error: LLM initialization failed. Please try again later."
|
59 |
|
60 |
try:
|
61 |
+
sampling_params = SamplingParams(max_tokens=max_tokens, temperature=temperature)
|
62 |
+
|
63 |
image = Image.open(BytesIO(requests.get(image_url).content))
|
64 |
image = image.resize((3844, 2408))
|
65 |
new_image_url = f"data:image/png;base64,{encode_image(image, image_format='PNG')}"
|
|
|
72 |
]
|
73 |
|
74 |
outputs = llm.chat(messages, sampling_params=sampling_params)
|
75 |
+
clear_cuda_cache()
|
76 |
return outputs[0].outputs[0].text
|
77 |
except Exception as e:
|
78 |
+
clear_cuda_cache()
|
79 |
return f"Error during inference: {e}"
|
80 |
|
81 |
+
def compare_images(image1_url, image2_url, prompt, temperature, max_tokens, progress=gr.Progress(track_tqdm=True)):
|
82 |
if llm is None:
|
83 |
return "Error: LLM initialization failed. Please try again later."
|
84 |
|
85 |
try:
|
86 |
+
sampling_params = SamplingParams(max_tokens=max_tokens, temperature=temperature)
|
87 |
+
|
88 |
image1 = Image.open(BytesIO(requests.get(image1_url).content))
|
89 |
image2 = Image.open(BytesIO(requests.get(image2_url).content))
|
90 |
image1 = image1.resize((3844, 2408))
|
|
|
104 |
]
|
105 |
|
106 |
outputs = llm.chat(messages, sampling_params=sampling_params)
|
107 |
+
clear_cuda_cache()
|
108 |
return outputs[0].outputs[0].text
|
109 |
except Exception as e:
|
110 |
+
clear_cuda_cache()
|
111 |
return f"Error during image comparison: {e}"
|
112 |
|
113 |
def calculate_image_similarity(image1_url, image2_url):
|
|
|
128 |
embedding2 = llm.model.vision_encoder([image2_tensor])
|
129 |
|
130 |
similarity = F.cosine_similarity(embedding1.mean(dim=0), embedding2.mean(dim=0), dim=0).item()
|
131 |
+
clear_cuda_cache()
|
132 |
return similarity
|
133 |
except Exception as e:
|
134 |
+
clear_cuda_cache()
|
135 |
return f"Error during image similarity calculation: {e}"
|
136 |
|
137 |
with gr.Blocks() as demo:
|
|
|
146 |
1. For Image-to-Text Generation:
|
147 |
- Enter the URL of an image
|
148 |
- Provide a prompt describing what you want to know about the image
|
149 |
+
- Adjust the temperature and max tokens
|
150 |
- Click "Generate" to get the model's response
|
151 |
2. For Image Comparison:
|
152 |
- Enter URLs for two images you want to compare
|
153 |
- Provide a prompt asking about the comparison
|
154 |
+
- Adjust the temperature and max tokens
|
155 |
- Click "Compare" to get the model's analysis
|
156 |
3. For Image Similarity:
|
157 |
- Enter URLs for two images you want to compare
|
|
|
164 |
with gr.Row():
|
165 |
image_url = gr.Text(label="Image URL")
|
166 |
prompt = gr.Text(label="Prompt")
|
167 |
+
with gr.Row():
|
168 |
+
temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="Temperature")
|
169 |
+
max_tokens = gr.Number(value=4096, label="Max Tokens")
|
170 |
generate_button = gr.Button("Generate")
|
171 |
output = gr.Text(label="Generated Text")
|
172 |
|
173 |
+
generate_button.click(infer, inputs=[image_url, prompt, temperature, max_tokens], outputs=output)
|
174 |
|
175 |
with gr.TabItem("Image Comparison"):
|
176 |
with gr.Row():
|
177 |
image1_url = gr.Text(label="Image 1 URL")
|
178 |
image2_url = gr.Text(label="Image 2 URL")
|
179 |
comparison_prompt = gr.Text(label="Comparison Prompt")
|
180 |
+
with gr.Row():
|
181 |
+
comparison_temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="Temperature")
|
182 |
+
comparison_max_tokens = gr.Number(value=4096, label="Max Tokens")
|
183 |
compare_button = gr.Button("Compare")
|
184 |
comparison_output = gr.Text(label="Comparison Result")
|
185 |
|
186 |
+
compare_button.click(compare_images, inputs=[image1_url, image2_url, comparison_prompt, comparison_temperature, comparison_max_tokens], outputs=comparison_output)
|
187 |
|
188 |
with gr.TabItem("Image Similarity"):
|
189 |
with gr.Row():
|
|
|
204 |
gr.Markdown(f"- Patch Size: {params['vision_encoder']['patch_size']}x{params['vision_encoder']['patch_size']}")
|
205 |
|
206 |
if __name__ == "__main__":
|
207 |
+
demo.launch()
|