prithivMLmods commited on
Commit
280b089
·
verified ·
1 Parent(s): 949d571

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -3
app.py CHANGED
@@ -8,6 +8,35 @@ import spaces
8
  import cv2
9
  import numpy as np
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  # Helper: Downsample video to extract a fixed number of frames.
12
  def downsample_video(video_path, num_frames=10):
13
  cap = cv2.VideoCapture(video_path)
@@ -88,7 +117,7 @@ def model_inference(
88
  buffer = ""
89
  thread = Thread(target=model.generate, kwargs=generation_args)
90
  thread.start()
91
- yield "..."
92
  for new_text in streamer:
93
  buffer += new_text
94
  time.sleep(0.01)
@@ -139,7 +168,7 @@ def model_inference(
139
  buffer = ""
140
  thread = Thread(target=model.generate, kwargs=generation_args)
141
  thread.start()
142
- yield "..."
143
  for new_text in streamer:
144
  buffer += new_text
145
  time.sleep(0.01)
@@ -148,7 +177,8 @@ def model_inference(
148
  # Gradio ChatInterface: Allow both image and video file types.
149
  demo = gr.ChatInterface(
150
  fn=model_inference,
151
- description="# **SmolVLM Video Infer**",
 
152
  textbox=gr.MultimodalTextbox(
153
  label="Query Input",
154
  file_types=["image", "video"],
 
8
  import cv2
9
  import numpy as np
10
 
11
+ # Helper function to return a progress bar HTML snippet.
12
+ def progress_bar_html(label: str) -> str:
13
+ return f'''
14
+ <div style="display: flex; align-items: center;">
15
+ <span style="margin-right: 10px; font-size: 14px;">{label}</span>
16
+ <div style="width: 110px; height: 5px; background-color: #FFB6C1; border-radius: 2px; overflow: hidden;">
17
+ <div style="width: 100%; height: 100%; background-color: #FF69B4; animation: loading 1.5s linear infinite;"></div>
18
+ </div>
19
+ </div>
20
+ <style>
21
+ @keyframes loading {{
22
+ 0% {{ transform: translateX(-100%); }}
23
+ 100% {{ transform: translateX(100%); }}
24
+ }}
25
+ </style>
26
+ '''
27
+
28
+ #adding examples
29
+ examples=[
30
+ [{"text": "Explain the Image", "files": ["examples/3.jpg"]}],
31
+ [{"text": "Transcription of the letter", "files": ["examples/222.png"]}],
32
+ [{"text": "@video-infer Explain the content of the Advertisement", "files": ["examples/videoplayback.mp4"]}],
33
+ [{"text": "@video-infer Explain the content of the video in detail", "files": ["examples/breakfast.mp4"]}],
34
+ [{"text": "@video-infer Describe the video", "files": ["examples/Missing.mp4"]}],
35
+ [{"text": "@video-infer Explain what is happening in this video ?", "files": ["examples/oreo.mp4"]}],
36
+ [{"text": "@video-infer Summarize the events in this video", "files": ["examples/sky.mp4"]}],
37
+ [{"text": "@video-infer What is in the video ?", "files": ["examples/redlight.mp4"]}],
38
+ ]
39
+
40
  # Helper: Downsample video to extract a fixed number of frames.
41
  def downsample_video(video_path, num_frames=10):
42
  cap = cv2.VideoCapture(video_path)
 
117
  buffer = ""
118
  thread = Thread(target=model.generate, kwargs=generation_args)
119
  thread.start()
120
+ yield progress_bar_html("Processing Video with SmolVLM")
121
  for new_text in streamer:
122
  buffer += new_text
123
  time.sleep(0.01)
 
168
  buffer = ""
169
  thread = Thread(target=model.generate, kwargs=generation_args)
170
  thread.start()
171
+ yield progress_bar_html("Processing Video with SmolVLM")
172
  for new_text in streamer:
173
  buffer += new_text
174
  time.sleep(0.01)
 
177
  # Gradio ChatInterface: Allow both image and video file types.
178
  demo = gr.ChatInterface(
179
  fn=model_inference,
180
+ description="# **SmolVLM Video Infer `@video-infer for video understanding`**",
181
+ examples=examples,
182
  textbox=gr.MultimodalTextbox(
183
  label="Query Input",
184
  file_types=["image", "video"],