Spaces:

echarlaix
/

vision-langage-openvino

Running on CPU Upgrade

App Files Files Community

echarlaix HF Staff commited on Jul 15

Commit

23b0e0e

1 Parent(s): eae8f4b

update space description

Browse files

Files changed (2) hide show

README.md +2 -2
app.py +8 -13

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
-title: Vision Langage Openvino
-emoji: 📊
 colorFrom: blue
 colorTo: green
 sdk: gradio

 ---
+title: Fast quantized SmolVLM2
+emoji: ⚡
 colorFrom: blue
 colorTo: green
 sdk: gradio

app.py CHANGED Viewed

@@ -162,7 +162,8 @@ def process_history(history: list[dict]) -> list[dict]:
 @torch.inference_mode()
-def generate(message: dict, history: list[dict], model_id: str, system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
     update_model(model_id)
     processor = model_cache["processor"]
@@ -215,8 +216,8 @@ def generate(message: dict, history: list[dict], model_id: str, system_prompt: s
 examples = [
     [
         {
-            "text": "What is the capital of France?",
-            "files": [],
         }
     ],
     [
@@ -246,23 +247,17 @@ examples = [
     ],
     [
         {
-            "text": "What is on the flower?",
-            "files": ["assets/bee.jpg"],
         }
     ],
 ]
 model_choices = [
-    # "echarlaix/SmolVLM2-2.2B-Instruct-openvino",
-    # "echarlaix/SmolVLM-256M-Instruct-openvino",
     "echarlaix/SmolVLM2-500M-Video-Instruct-openvino",
-    # "echarlaix/SmolVLM2-256M-Video-Instruct-openvino",
-    # "echarlaix/SmolVLM2-256M-Video-Instruct-openvino-8bit-woq-data-free",
-    # "echarlaix/SmolVLM2-256M-Video-Instruct-openvino-8bit-static",
     "echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-static",
     "echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-woq-data-free",
-    # "echarlaix/SmolVLM2-2.2B-Instruct-openvino-8bit-static",
 ]
 demo = gr.ChatInterface(
@@ -276,12 +271,12 @@ demo = gr.ChatInterface(
     multimodal=True,
     additional_inputs=[
         gr.Dropdown(model_choices, value=model_choices[0], label="Model ID"),
-        gr.Textbox(label="System Prompt", value="You are a helpful assistant."),
         gr.Slider(label="Max New Tokens", minimum=100, maximum=2000, step=10, value=700),
     ],
     stop_btn=False,
     title="Fast quantized SmolVLM2 ⚡",
-    description="Play with a [SmolVLM2-500M-Video-Instruct](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino) and its quantized counterpart. Weight Only Quantization and Static Quantization was applied with [Optimum Intel](https://github.com/huggingface/optimum-intel) NNCF integration. To get started, upload an image and text or try one of the examples. This demo runs on 4th Generation Intel Xeon (Sapphire Rapids) processors.",
     examples=examples,
     run_examples_on_click=False,
     cache_examples=False,

 @torch.inference_mode()
+def generate(message: dict, history: list[dict], model_id: str, max_new_tokens: int = 512) -> Iterator[str]:
+    system_prompt = "You are a helpful assistant."
     update_model(model_id)
     processor = model_cache["processor"]
 examples = [
     [
         {
+            "text": "What is on the flower?",
+            "files": ["assets/bee.jpg"],
         }
     ],
     [
     ],
     [
         {
+            "text": "What is the capital of France?",
+            "files": [],
         }
     ],
 ]
 model_choices = [
     "echarlaix/SmolVLM2-500M-Video-Instruct-openvino",
     "echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-static",
     "echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-woq-data-free",
 ]
 demo = gr.ChatInterface(
     multimodal=True,
     additional_inputs=[
         gr.Dropdown(model_choices, value=model_choices[0], label="Model ID"),
+        # gr.Textbox(label="System Prompt", value="You are a helpful assistant."),
         gr.Slider(label="Max New Tokens", minimum=100, maximum=2000, step=10, value=700),
     ],
     stop_btn=False,
     title="Fast quantized SmolVLM2 ⚡",
+    description="Play with a [SmolVLM2-500M-Video-Instruct](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino) and its quantized variants : [SmolVLM2-500M-Video-Instruct-openvino-8bit-woq-data-free](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-woq-data-free) and [SmolVLM2-500M-Video-Instruct-openvino-8bit-static](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-static) both obtained by respectively applying Weight-Only Quantization and Static Quantization  using [Optimum Intel](https://github.com/huggingface/optimum-intel) NNCF integration. To get started, upload an image and text or try one of the examples. This demo runs on 4th Generation Intel Xeon (Sapphire Rapids) processors.",
     examples=examples,
     run_examples_on_click=False,
     cache_examples=False,