Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,35 @@ import spaces
|
|
8 |
import cv2
|
9 |
import numpy as np
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
# Helper: Downsample video to extract a fixed number of frames.
|
12 |
def downsample_video(video_path, num_frames=10):
|
13 |
cap = cv2.VideoCapture(video_path)
|
@@ -88,7 +117,7 @@ def model_inference(
|
|
88 |
buffer = ""
|
89 |
thread = Thread(target=model.generate, kwargs=generation_args)
|
90 |
thread.start()
|
91 |
-
yield "
|
92 |
for new_text in streamer:
|
93 |
buffer += new_text
|
94 |
time.sleep(0.01)
|
@@ -139,7 +168,7 @@ def model_inference(
|
|
139 |
buffer = ""
|
140 |
thread = Thread(target=model.generate, kwargs=generation_args)
|
141 |
thread.start()
|
142 |
-
yield "
|
143 |
for new_text in streamer:
|
144 |
buffer += new_text
|
145 |
time.sleep(0.01)
|
@@ -148,7 +177,8 @@ def model_inference(
|
|
148 |
# Gradio ChatInterface: Allow both image and video file types.
|
149 |
demo = gr.ChatInterface(
|
150 |
fn=model_inference,
|
151 |
-
description="# **SmolVLM Video Infer
|
|
|
152 |
textbox=gr.MultimodalTextbox(
|
153 |
label="Query Input",
|
154 |
file_types=["image", "video"],
|
|
|
8 |
import cv2
|
9 |
import numpy as np
|
10 |
|
11 |
+
# Helper function to return a progress bar HTML snippet.
|
12 |
+
def progress_bar_html(label: str) -> str:
|
13 |
+
return f'''
|
14 |
+
<div style="display: flex; align-items: center;">
|
15 |
+
<span style="margin-right: 10px; font-size: 14px;">{label}</span>
|
16 |
+
<div style="width: 110px; height: 5px; background-color: #FFB6C1; border-radius: 2px; overflow: hidden;">
|
17 |
+
<div style="width: 100%; height: 100%; background-color: #FF69B4; animation: loading 1.5s linear infinite;"></div>
|
18 |
+
</div>
|
19 |
+
</div>
|
20 |
+
<style>
|
21 |
+
@keyframes loading {{
|
22 |
+
0% {{ transform: translateX(-100%); }}
|
23 |
+
100% {{ transform: translateX(100%); }}
|
24 |
+
}}
|
25 |
+
</style>
|
26 |
+
'''
|
27 |
+
|
28 |
+
#adding examples
|
29 |
+
examples=[
|
30 |
+
[{"text": "Explain the Image", "files": ["examples/3.jpg"]}],
|
31 |
+
[{"text": "Transcription of the letter", "files": ["examples/222.png"]}],
|
32 |
+
[{"text": "@video-infer Explain the content of the Advertisement", "files": ["examples/videoplayback.mp4"]}],
|
33 |
+
[{"text": "@video-infer Explain the content of the video in detail", "files": ["examples/breakfast.mp4"]}],
|
34 |
+
[{"text": "@video-infer Describe the video", "files": ["examples/Missing.mp4"]}],
|
35 |
+
[{"text": "@video-infer Explain what is happening in this video ?", "files": ["examples/oreo.mp4"]}],
|
36 |
+
[{"text": "@video-infer Summarize the events in this video", "files": ["examples/sky.mp4"]}],
|
37 |
+
[{"text": "@video-infer What is in the video ?", "files": ["examples/redlight.mp4"]}],
|
38 |
+
]
|
39 |
+
|
40 |
# Helper: Downsample video to extract a fixed number of frames.
|
41 |
def downsample_video(video_path, num_frames=10):
|
42 |
cap = cv2.VideoCapture(video_path)
|
|
|
117 |
buffer = ""
|
118 |
thread = Thread(target=model.generate, kwargs=generation_args)
|
119 |
thread.start()
|
120 |
+
yield progress_bar_html("Processing Video with SmolVLM")
|
121 |
for new_text in streamer:
|
122 |
buffer += new_text
|
123 |
time.sleep(0.01)
|
|
|
168 |
buffer = ""
|
169 |
thread = Thread(target=model.generate, kwargs=generation_args)
|
170 |
thread.start()
|
171 |
+
yield progress_bar_html("Processing Video with SmolVLM")
|
172 |
for new_text in streamer:
|
173 |
buffer += new_text
|
174 |
time.sleep(0.01)
|
|
|
177 |
# Gradio ChatInterface: Allow both image and video file types.
|
178 |
demo = gr.ChatInterface(
|
179 |
fn=model_inference,
|
180 |
+
description="# **SmolVLM Video Infer `@video-infer for video understanding`**",
|
181 |
+
examples=examples,
|
182 |
textbox=gr.MultimodalTextbox(
|
183 |
label="Query Input",
|
184 |
file_types=["image", "video"],
|