echarlaix HF Staff commited on
Commit
23b0e0e
·
1 Parent(s): eae8f4b

update space description

Browse files
Files changed (2) hide show
  1. README.md +2 -2
  2. app.py +8 -13
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- title: Vision Langage Openvino
3
- emoji: 📊
4
  colorFrom: blue
5
  colorTo: green
6
  sdk: gradio
 
1
  ---
2
+ title: Fast quantized SmolVLM2
3
+ emoji:
4
  colorFrom: blue
5
  colorTo: green
6
  sdk: gradio
app.py CHANGED
@@ -162,7 +162,8 @@ def process_history(history: list[dict]) -> list[dict]:
162
 
163
 
164
  @torch.inference_mode()
165
- def generate(message: dict, history: list[dict], model_id: str, system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
 
166
 
167
  update_model(model_id)
168
  processor = model_cache["processor"]
@@ -215,8 +216,8 @@ def generate(message: dict, history: list[dict], model_id: str, system_prompt: s
215
  examples = [
216
  [
217
  {
218
- "text": "What is the capital of France?",
219
- "files": [],
220
  }
221
  ],
222
  [
@@ -246,23 +247,17 @@ examples = [
246
  ],
247
  [
248
  {
249
- "text": "What is on the flower?",
250
- "files": ["assets/bee.jpg"],
251
  }
252
  ],
253
  ]
254
 
255
 
256
  model_choices = [
257
- # "echarlaix/SmolVLM2-2.2B-Instruct-openvino",
258
- # "echarlaix/SmolVLM-256M-Instruct-openvino",
259
  "echarlaix/SmolVLM2-500M-Video-Instruct-openvino",
260
- # "echarlaix/SmolVLM2-256M-Video-Instruct-openvino",
261
- # "echarlaix/SmolVLM2-256M-Video-Instruct-openvino-8bit-woq-data-free",
262
- # "echarlaix/SmolVLM2-256M-Video-Instruct-openvino-8bit-static",
263
  "echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-static",
264
  "echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-woq-data-free",
265
- # "echarlaix/SmolVLM2-2.2B-Instruct-openvino-8bit-static",
266
  ]
267
 
268
  demo = gr.ChatInterface(
@@ -276,12 +271,12 @@ demo = gr.ChatInterface(
276
  multimodal=True,
277
  additional_inputs=[
278
  gr.Dropdown(model_choices, value=model_choices[0], label="Model ID"),
279
- gr.Textbox(label="System Prompt", value="You are a helpful assistant."),
280
  gr.Slider(label="Max New Tokens", minimum=100, maximum=2000, step=10, value=700),
281
  ],
282
  stop_btn=False,
283
  title="Fast quantized SmolVLM2 ⚡",
284
- description="Play with a [SmolVLM2-500M-Video-Instruct](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino) and its quantized counterpart. Weight Only Quantization and Static Quantization was applied with [Optimum Intel](https://github.com/huggingface/optimum-intel) NNCF integration. To get started, upload an image and text or try one of the examples. This demo runs on 4th Generation Intel Xeon (Sapphire Rapids) processors.",
285
  examples=examples,
286
  run_examples_on_click=False,
287
  cache_examples=False,
 
162
 
163
 
164
  @torch.inference_mode()
165
+ def generate(message: dict, history: list[dict], model_id: str, max_new_tokens: int = 512) -> Iterator[str]:
166
+ system_prompt = "You are a helpful assistant."
167
 
168
  update_model(model_id)
169
  processor = model_cache["processor"]
 
216
  examples = [
217
  [
218
  {
219
+ "text": "What is on the flower?",
220
+ "files": ["assets/bee.jpg"],
221
  }
222
  ],
223
  [
 
247
  ],
248
  [
249
  {
250
+ "text": "What is the capital of France?",
251
+ "files": [],
252
  }
253
  ],
254
  ]
255
 
256
 
257
  model_choices = [
 
 
258
  "echarlaix/SmolVLM2-500M-Video-Instruct-openvino",
 
 
 
259
  "echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-static",
260
  "echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-woq-data-free",
 
261
  ]
262
 
263
  demo = gr.ChatInterface(
 
271
  multimodal=True,
272
  additional_inputs=[
273
  gr.Dropdown(model_choices, value=model_choices[0], label="Model ID"),
274
+ # gr.Textbox(label="System Prompt", value="You are a helpful assistant."),
275
  gr.Slider(label="Max New Tokens", minimum=100, maximum=2000, step=10, value=700),
276
  ],
277
  stop_btn=False,
278
  title="Fast quantized SmolVLM2 ⚡",
279
+ description="Play with a [SmolVLM2-500M-Video-Instruct](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino) and its quantized variants : [SmolVLM2-500M-Video-Instruct-openvino-8bit-woq-data-free](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-woq-data-free) and [SmolVLM2-500M-Video-Instruct-openvino-8bit-static](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-static) both obtained by respectively applying Weight-Only Quantization and Static Quantization using [Optimum Intel](https://github.com/huggingface/optimum-intel) NNCF integration. To get started, upload an image and text or try one of the examples. This demo runs on 4th Generation Intel Xeon (Sapphire Rapids) processors.",
280
  examples=examples,
281
  run_examples_on_click=False,
282
  cache_examples=False,