Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -15,8 +15,8 @@ class GradioBackend:
|
|
15 |
self.infer = LiveCCDemoInfer(model_path)
|
16 |
self.audio_pipeline = KPipeline(lang_code='a')
|
17 |
|
18 |
-
def __call__(self,
|
19 |
-
return getattr(self.infer, self.mode2api[mode])(
|
20 |
|
21 |
gradio_backend = None
|
22 |
|
@@ -24,8 +24,8 @@ with gr.Blocks() as demo:
|
|
24 |
gr.Markdown("## LiveCC Conversation and Real-Time Commentary - Gradio Demo")
|
25 |
gr.Markdown("### [LiveCC: Learning Video LLM with Streaming Speech Transcription at Scale (CVPR 2025)](https://showlab.github.io/livecc/)")
|
26 |
gr.Markdown("1️⃣ Select Mode, Real-Time Commentary (LiveCC) or Conversation (Common QA)")
|
27 |
-
gr.Markdown("2️⃣🅰️ Real-Time Commentary: Input a query (optional) -> Click or upload a video
|
28 |
-
gr.Markdown("2️⃣🅱️ Conversation: Click or upload a video -> Input a query.")
|
29 |
gr.Markdown("*Web Gradio has unexpected latency (3s~5s). If you want to enjoy the very real-time experience, please deploy locally https://github.com/showlab/livecc*")
|
30 |
gr_state = gr.State({}, render=False) # control all useful state, including kv cache
|
31 |
gr_video_state = gr.JSON({}, visible=False) # only record video state, belong to gr_state but lightweight
|
@@ -60,23 +60,22 @@ with gr.Blocks() as demo:
|
|
60 |
@spaces.GPU
|
61 |
def gr_chatinterface_fn(message, history, state, video_path, mode):
|
62 |
global gradio_backend
|
63 |
-
yield '(initializing model, thanks for waiting...)'
|
64 |
if gradio_backend is None:
|
65 |
gradio_backend = GradioBackend()
|
66 |
state['video_path'] = video_path
|
67 |
-
yield '(finished initialization, responding...)'
|
68 |
if mode != 'Conversation':
|
69 |
-
yield 'waiting video input...'
|
70 |
-
response, state = gradio_backend(
|
71 |
-
yield response
|
72 |
|
73 |
def gr_chatinterface_chatbot_clear_fn():
|
74 |
return {}, {}, 0, 0
|
75 |
gr_chatinterface = gr.ChatInterface(
|
76 |
fn=gr_chatinterface_fn,
|
77 |
type="messages",
|
78 |
-
additional_inputs=[gr_state, gr_video, gr_radio_mode]
|
79 |
-
additional_outputs=[gr_state],
|
80 |
)
|
81 |
gr_chatinterface.chatbot.clear(fn=gr_chatinterface_chatbot_clear_fn, outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
|
82 |
gr_clean_button.click(fn=lambda :[[], *gr_chatinterface_chatbot_clear_fn()], outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
|
|
|
15 |
self.infer = LiveCCDemoInfer(model_path)
|
16 |
self.audio_pipeline = KPipeline(lang_code='a')
|
17 |
|
18 |
+
def __call__(self, message: str = None, history: list[str] = None, state: dict = {}, mode: str = 'Real-Time Commentary', **kwargs):
|
19 |
+
return getattr(self.infer, self.mode2api[mode])(message=message, history=history, state=state, **kwargs)
|
20 |
|
21 |
gradio_backend = None
|
22 |
|
|
|
24 |
gr.Markdown("## LiveCC Conversation and Real-Time Commentary - Gradio Demo")
|
25 |
gr.Markdown("### [LiveCC: Learning Video LLM with Streaming Speech Transcription at Scale (CVPR 2025)](https://showlab.github.io/livecc/)")
|
26 |
gr.Markdown("1️⃣ Select Mode, Real-Time Commentary (LiveCC) or Conversation (Common QA)")
|
27 |
+
gr.Markdown("2️⃣🅰️ **Real-Time Commentary: Input a query (optional) -> Click or upload a video**.")
|
28 |
+
gr.Markdown("2️⃣🅱️ **Conversation: Click or upload a video -> Input a query**. But as the past_key_values support in ZeroGPU is not good, multi-turn conversation could be slower.")
|
29 |
gr.Markdown("*Web Gradio has unexpected latency (3s~5s). If you want to enjoy the very real-time experience, please deploy locally https://github.com/showlab/livecc*")
|
30 |
gr_state = gr.State({}, render=False) # control all useful state, including kv cache
|
31 |
gr_video_state = gr.JSON({}, visible=False) # only record video state, belong to gr_state but lightweight
|
|
|
60 |
@spaces.GPU
|
61 |
def gr_chatinterface_fn(message, history, state, video_path, mode):
|
62 |
global gradio_backend
|
63 |
+
yield '(initializing model, thanks for waiting...)'
|
64 |
if gradio_backend is None:
|
65 |
gradio_backend = GradioBackend()
|
66 |
state['video_path'] = video_path
|
67 |
+
yield '(finished initialization, responding...)'
|
68 |
if mode != 'Conversation':
|
69 |
+
yield 'waiting video input...'
|
70 |
+
response, state = gradio_backend(message=message, history=history, state=state, mode=mode)
|
71 |
+
yield response
|
72 |
|
73 |
def gr_chatinterface_chatbot_clear_fn():
|
74 |
return {}, {}, 0, 0
|
75 |
gr_chatinterface = gr.ChatInterface(
|
76 |
fn=gr_chatinterface_fn,
|
77 |
type="messages",
|
78 |
+
additional_inputs=[gr_state, gr_video, gr_radio_mode]
|
|
|
79 |
)
|
80 |
gr_chatinterface.chatbot.clear(fn=gr_chatinterface_chatbot_clear_fn, outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
|
81 |
gr_clean_button.click(fn=lambda :[[], *gr_chatinterface_chatbot_clear_fn()], outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
|