Spaces:

prithivMLmods
/

Doc-VLMs

Running on Zero

prithivMLmods commited on Mar 15

Commit

0109e78

verified ·

1 Parent(s): 2c8da8a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import gradio as gr
-from transformers import AutoProcessor, AutoModelForVision2Seq, AutoModelForImageTextToText, TextIteratorStreamer
 from threading import Thread
 import re
 import time
@@ -7,12 +7,10 @@ from PIL import Image
 import torch
 import spaces
-# Load processor and model
-processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM2-2.2B-Instruct")
-model = AutoModelForImageTextToText.from_pretrained(
-    "HuggingFaceTB/SmolVLM2-2.2B-Instruct",
-    torch_dtype=torch.bfloat16,
-).to("cuda")
 @spaces.GPU
 def model_inference(
@@ -80,8 +78,7 @@ def model_inference(
 # Define the ChatInterface without examples.
 demo = gr.ChatInterface(
     fn=model_inference,
-    title="SmolVLM: Small yet Mighty 💫",
-    description="Play with [HuggingFaceTB/SmolVLM-Instruct](https://huggingface.co/HuggingFaceTB/SmolVLM-Instruct) in this demo. To get started, upload an image and text.",
     textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple"),
     stop_btn="Stop Generation",
     multimodal=True,

 import gradio as gr
+from transformers import AutoProcessor, AutoModelForVision2Seq, TextIteratorStreamer
 from threading import Thread
 import re
 import time
 import torch
 import spaces
+processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-Instruct")
+model = AutoModelForVision2Seq.from_pretrained("HuggingFaceTB/SmolVLM-Instruct",
+        torch_dtype=torch.bfloat16,
+        ).to("cuda")
 @spaces.GPU
 def model_inference(
 # Define the ChatInterface without examples.
 demo = gr.ChatInterface(
     fn=model_inference,
+    description="# **SmolVLM Video Infer**",
     textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple"),
     stop_btn="Stop Generation",
     multimodal=True,