Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
update space description
Browse files
README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
colorFrom: blue
|
5 |
colorTo: green
|
6 |
sdk: gradio
|
|
|
1 |
---
|
2 |
+
title: Fast quantized SmolVLM2
|
3 |
+
emoji: ⚡
|
4 |
colorFrom: blue
|
5 |
colorTo: green
|
6 |
sdk: gradio
|
app.py
CHANGED
@@ -162,7 +162,8 @@ def process_history(history: list[dict]) -> list[dict]:
|
|
162 |
|
163 |
|
164 |
@torch.inference_mode()
|
165 |
-
def generate(message: dict, history: list[dict], model_id: str,
|
|
|
166 |
|
167 |
update_model(model_id)
|
168 |
processor = model_cache["processor"]
|
@@ -215,8 +216,8 @@ def generate(message: dict, history: list[dict], model_id: str, system_prompt: s
|
|
215 |
examples = [
|
216 |
[
|
217 |
{
|
218 |
-
"text": "What is the
|
219 |
-
"files": [],
|
220 |
}
|
221 |
],
|
222 |
[
|
@@ -246,23 +247,17 @@ examples = [
|
|
246 |
],
|
247 |
[
|
248 |
{
|
249 |
-
"text": "What is
|
250 |
-
"files": [
|
251 |
}
|
252 |
],
|
253 |
]
|
254 |
|
255 |
|
256 |
model_choices = [
|
257 |
-
# "echarlaix/SmolVLM2-2.2B-Instruct-openvino",
|
258 |
-
# "echarlaix/SmolVLM-256M-Instruct-openvino",
|
259 |
"echarlaix/SmolVLM2-500M-Video-Instruct-openvino",
|
260 |
-
# "echarlaix/SmolVLM2-256M-Video-Instruct-openvino",
|
261 |
-
# "echarlaix/SmolVLM2-256M-Video-Instruct-openvino-8bit-woq-data-free",
|
262 |
-
# "echarlaix/SmolVLM2-256M-Video-Instruct-openvino-8bit-static",
|
263 |
"echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-static",
|
264 |
"echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-woq-data-free",
|
265 |
-
# "echarlaix/SmolVLM2-2.2B-Instruct-openvino-8bit-static",
|
266 |
]
|
267 |
|
268 |
demo = gr.ChatInterface(
|
@@ -276,12 +271,12 @@ demo = gr.ChatInterface(
|
|
276 |
multimodal=True,
|
277 |
additional_inputs=[
|
278 |
gr.Dropdown(model_choices, value=model_choices[0], label="Model ID"),
|
279 |
-
gr.Textbox(label="System Prompt", value="You are a helpful assistant."),
|
280 |
gr.Slider(label="Max New Tokens", minimum=100, maximum=2000, step=10, value=700),
|
281 |
],
|
282 |
stop_btn=False,
|
283 |
title="Fast quantized SmolVLM2 ⚡",
|
284 |
-
description="Play with a [SmolVLM2-500M-Video-Instruct](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino) and its quantized
|
285 |
examples=examples,
|
286 |
run_examples_on_click=False,
|
287 |
cache_examples=False,
|
|
|
162 |
|
163 |
|
164 |
@torch.inference_mode()
|
165 |
+
def generate(message: dict, history: list[dict], model_id: str, max_new_tokens: int = 512) -> Iterator[str]:
|
166 |
+
system_prompt = "You are a helpful assistant."
|
167 |
|
168 |
update_model(model_id)
|
169 |
processor = model_cache["processor"]
|
|
|
216 |
examples = [
|
217 |
[
|
218 |
{
|
219 |
+
"text": "What is on the flower?",
|
220 |
+
"files": ["assets/bee.jpg"],
|
221 |
}
|
222 |
],
|
223 |
[
|
|
|
247 |
],
|
248 |
[
|
249 |
{
|
250 |
+
"text": "What is the capital of France?",
|
251 |
+
"files": [],
|
252 |
}
|
253 |
],
|
254 |
]
|
255 |
|
256 |
|
257 |
model_choices = [
|
|
|
|
|
258 |
"echarlaix/SmolVLM2-500M-Video-Instruct-openvino",
|
|
|
|
|
|
|
259 |
"echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-static",
|
260 |
"echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-woq-data-free",
|
|
|
261 |
]
|
262 |
|
263 |
demo = gr.ChatInterface(
|
|
|
271 |
multimodal=True,
|
272 |
additional_inputs=[
|
273 |
gr.Dropdown(model_choices, value=model_choices[0], label="Model ID"),
|
274 |
+
# gr.Textbox(label="System Prompt", value="You are a helpful assistant."),
|
275 |
gr.Slider(label="Max New Tokens", minimum=100, maximum=2000, step=10, value=700),
|
276 |
],
|
277 |
stop_btn=False,
|
278 |
title="Fast quantized SmolVLM2 ⚡",
|
279 |
+
description="Play with a [SmolVLM2-500M-Video-Instruct](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino) and its quantized variants : [SmolVLM2-500M-Video-Instruct-openvino-8bit-woq-data-free](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-woq-data-free) and [SmolVLM2-500M-Video-Instruct-openvino-8bit-static](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-static) both obtained by respectively applying Weight-Only Quantization and Static Quantization using [Optimum Intel](https://github.com/huggingface/optimum-intel) NNCF integration. To get started, upload an image and text or try one of the examples. This demo runs on 4th Generation Intel Xeon (Sapphire Rapids) processors.",
|
280 |
examples=examples,
|
281 |
run_examples_on_click=False,
|
282 |
cache_examples=False,
|