Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,11 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import gradio as gr
|
3 |
|
4 |
from kokoro import KPipeline
|
@@ -18,8 +25,8 @@ class GradioBackend:
|
|
18 |
def __call__(self, message: str = None, history: list[str] = None, state: dict = {}, mode: str = 'Real-Time Commentary', **kwargs):
|
19 |
return getattr(self.infer, self.mode2api[mode])(message=message, history=history, state=state, **kwargs)
|
20 |
|
21 |
-
gradio_backend = None
|
22 |
-
|
23 |
with gr.Blocks() as demo:
|
24 |
gr.Markdown("## LiveCC Conversation and Real-Time Commentary - Gradio Demo")
|
25 |
gr.Markdown("### [LiveCC: Learning Video LLM with Streaming Speech Transcription at Scale (CVPR 2025)](https://showlab.github.io/livecc/)")
|
@@ -40,7 +47,6 @@ with gr.Blocks() as demo:
|
|
40 |
visible=True,
|
41 |
sources=['upload'],
|
42 |
autoplay=True,
|
43 |
-
include_audio=False,
|
44 |
width=720,
|
45 |
height=480
|
46 |
)
|
@@ -57,34 +63,37 @@ with gr.Blocks() as demo:
|
|
57 |
with gr.Row():
|
58 |
gr_radio_mode = gr.Radio(label="Select Mode", choices=["Real-Time Commentary", "Conversation"], elem_id="gr_radio_mode", value='Real-Time Commentary', interactive=True)
|
59 |
|
60 |
-
@spaces.GPU
|
61 |
def gr_chatinterface_fn(message, history, state, video_path, mode):
|
62 |
global gradio_backend
|
63 |
-
yield '(initializing model, thanks for waiting...)'
|
64 |
if gradio_backend is None:
|
|
|
65 |
gradio_backend = GradioBackend()
|
|
|
66 |
state['video_path'] = video_path
|
67 |
-
yield '(finished initialization, responding...)'
|
68 |
if mode != 'Conversation':
|
69 |
yield 'waiting video input...'
|
70 |
-
response, state = gradio_backend(message=message, history=history, state=state, mode=mode)
|
71 |
-
yield response
|
72 |
|
73 |
def gr_chatinterface_chatbot_clear_fn():
|
74 |
return {}, {}, 0, 0
|
75 |
gr_chatinterface = gr.ChatInterface(
|
76 |
fn=gr_chatinterface_fn,
|
77 |
type="messages",
|
78 |
-
additional_inputs=[gr_state, gr_video, gr_radio_mode]
|
|
|
79 |
)
|
80 |
gr_chatinterface.chatbot.clear(fn=gr_chatinterface_chatbot_clear_fn, outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
|
81 |
gr_clean_button.click(fn=lambda :[[], *gr_chatinterface_chatbot_clear_fn()], outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
|
82 |
|
83 |
def gr_for_streaming(history: list[gr.ChatMessage], video_state: dict, state: dict, mode: str, static_trigger: int, dynamic_trigger: int):
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
|
|
|
|
88 |
state.update(video_state)
|
89 |
query, assistant_waiting_message = None, None
|
90 |
for message in history[::-1]:
|
@@ -100,7 +109,7 @@ with gr.Blocks() as demo:
|
|
100 |
elif message['content'] == GradioBackend.waiting_video_response:
|
101 |
assistant_waiting_message = message
|
102 |
|
103 |
-
for (start_timestamp, stop_timestamp), response, state in gradio_backend(query=query, state=state, mode=mode):
|
104 |
if start_timestamp >= 0:
|
105 |
response_with_timestamp = f'{start_timestamp:.1f}s-{stop_timestamp:.1f}s: {response}'
|
106 |
if assistant_waiting_message is None:
|
@@ -109,7 +118,10 @@ with gr.Blocks() as demo:
|
|
109 |
assistant_waiting_message['content'] = response_with_timestamp
|
110 |
assistant_waiting_message = None
|
111 |
yield history, state, dynamic_trigger
|
112 |
-
|
|
|
|
|
|
|
113 |
|
114 |
js_video_timestamp_fetcher = """
|
115 |
(state, video_state) => {
|
|
|
1 |
+
hf_spaces = False
|
2 |
+
js_monitor = False # if False, will not care about the actual video timestamp in front end. Suitable for enviroment with unsolvable latency (e.g. hf spaces)
|
3 |
+
if hf_spaces:
|
4 |
+
try:
|
5 |
+
import spaces
|
6 |
+
except Exception as e:
|
7 |
+
print(e)
|
8 |
+
import os
|
9 |
import gradio as gr
|
10 |
|
11 |
from kokoro import KPipeline
|
|
|
25 |
def __call__(self, message: str = None, history: list[str] = None, state: dict = {}, mode: str = 'Real-Time Commentary', **kwargs):
|
26 |
return getattr(self.infer, self.mode2api[mode])(message=message, history=history, state=state, **kwargs)
|
27 |
|
28 |
+
gradio_backend = None if hf_spaces else GradioBackend()
|
29 |
+
|
30 |
with gr.Blocks() as demo:
|
31 |
gr.Markdown("## LiveCC Conversation and Real-Time Commentary - Gradio Demo")
|
32 |
gr.Markdown("### [LiveCC: Learning Video LLM with Streaming Speech Transcription at Scale (CVPR 2025)](https://showlab.github.io/livecc/)")
|
|
|
47 |
visible=True,
|
48 |
sources=['upload'],
|
49 |
autoplay=True,
|
|
|
50 |
width=720,
|
51 |
height=480
|
52 |
)
|
|
|
63 |
with gr.Row():
|
64 |
gr_radio_mode = gr.Radio(label="Select Mode", choices=["Real-Time Commentary", "Conversation"], elem_id="gr_radio_mode", value='Real-Time Commentary', interactive=True)
|
65 |
|
66 |
+
# @spaces.GPU
|
67 |
def gr_chatinterface_fn(message, history, state, video_path, mode):
|
68 |
global gradio_backend
|
|
|
69 |
if gradio_backend is None:
|
70 |
+
yield '(ZeroGPU needs to initialize model under @spaces.GPU, thanks for waiting...)', state
|
71 |
gradio_backend = GradioBackend()
|
72 |
+
yield '(finished initialization, responding...)', state
|
73 |
state['video_path'] = video_path
|
|
|
74 |
if mode != 'Conversation':
|
75 |
yield 'waiting video input...'
|
76 |
+
response, state = gradio_backend(message=message, history=history, state=state, mode=mode, hf_spaces=hf_spaces)
|
77 |
+
yield response, state
|
78 |
|
79 |
def gr_chatinterface_chatbot_clear_fn():
|
80 |
return {}, {}, 0, 0
|
81 |
gr_chatinterface = gr.ChatInterface(
|
82 |
fn=gr_chatinterface_fn,
|
83 |
type="messages",
|
84 |
+
additional_inputs=[gr_state, gr_video, gr_radio_mode],
|
85 |
+
additional_outputs=[gr_state]
|
86 |
)
|
87 |
gr_chatinterface.chatbot.clear(fn=gr_chatinterface_chatbot_clear_fn, outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
|
88 |
gr_clean_button.click(fn=lambda :[[], *gr_chatinterface_chatbot_clear_fn()], outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
|
89 |
|
90 |
def gr_for_streaming(history: list[gr.ChatMessage], video_state: dict, state: dict, mode: str, static_trigger: int, dynamic_trigger: int):
|
91 |
+
if static_trigger == 0:
|
92 |
+
yield [], {}, dynamic_trigger
|
93 |
+
return
|
94 |
+
yield history + [gr.ChatMessage(role="assistant", content='Loading video... thanks for waiting...')], state, dynamic_trigger
|
95 |
+
if not js_monitor:
|
96 |
+
video_state['video_timestamp'] = 19260817 # 👓
|
97 |
state.update(video_state)
|
98 |
query, assistant_waiting_message = None, None
|
99 |
for message in history[::-1]:
|
|
|
109 |
elif message['content'] == GradioBackend.waiting_video_response:
|
110 |
assistant_waiting_message = message
|
111 |
|
112 |
+
for (start_timestamp, stop_timestamp), response, state in gradio_backend(query=query, state=state, mode=mode, hf_spaces=hf_spaces):
|
113 |
if start_timestamp >= 0:
|
114 |
response_with_timestamp = f'{start_timestamp:.1f}s-{stop_timestamp:.1f}s: {response}'
|
115 |
if assistant_waiting_message is None:
|
|
|
118 |
assistant_waiting_message['content'] = response_with_timestamp
|
119 |
assistant_waiting_message = None
|
120 |
yield history, state, dynamic_trigger
|
121 |
+
if js_monitor:
|
122 |
+
yield history, state, 1 - dynamic_trigger
|
123 |
+
else:
|
124 |
+
yield history, state, dynamic_trigger
|
125 |
|
126 |
js_video_timestamp_fetcher = """
|
127 |
(state, video_state) => {
|