prithivMLmods commited on
Commit
0109e78
·
verified ·
1 Parent(s): 2c8da8a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -9
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from transformers import AutoProcessor, AutoModelForVision2Seq, AutoModelForImageTextToText, TextIteratorStreamer
3
  from threading import Thread
4
  import re
5
  import time
@@ -7,12 +7,10 @@ from PIL import Image
7
  import torch
8
  import spaces
9
 
10
- # Load processor and model
11
- processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM2-2.2B-Instruct")
12
- model = AutoModelForImageTextToText.from_pretrained(
13
- "HuggingFaceTB/SmolVLM2-2.2B-Instruct",
14
- torch_dtype=torch.bfloat16,
15
- ).to("cuda")
16
 
17
  @spaces.GPU
18
  def model_inference(
@@ -80,8 +78,7 @@ def model_inference(
80
  # Define the ChatInterface without examples.
81
  demo = gr.ChatInterface(
82
  fn=model_inference,
83
- title="SmolVLM: Small yet Mighty 💫",
84
- description="Play with [HuggingFaceTB/SmolVLM-Instruct](https://huggingface.co/HuggingFaceTB/SmolVLM-Instruct) in this demo. To get started, upload an image and text.",
85
  textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple"),
86
  stop_btn="Stop Generation",
87
  multimodal=True,
 
1
  import gradio as gr
2
+ from transformers import AutoProcessor, AutoModelForVision2Seq, TextIteratorStreamer
3
  from threading import Thread
4
  import re
5
  import time
 
7
  import torch
8
  import spaces
9
 
10
+ processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-Instruct")
11
+ model = AutoModelForVision2Seq.from_pretrained("HuggingFaceTB/SmolVLM-Instruct",
12
+ torch_dtype=torch.bfloat16,
13
+ ).to("cuda")
 
 
14
 
15
  @spaces.GPU
16
  def model_inference(
 
78
  # Define the ChatInterface without examples.
79
  demo = gr.ChatInterface(
80
  fn=model_inference,
81
+ description="# **SmolVLM Video Infer**",
 
82
  textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple"),
83
  stop_btn="Stop Generation",
84
  multimodal=True,