Spaces:

chenjoya
/

LiveCC

Running on Zero

App Files Files Community

chenjoya commited on Apr 23

Commit

292389d

verified ·

1 Parent(s): ea5bc09

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -16

app.py CHANGED Viewed

@@ -1,4 +1,11 @@
-import spaces, os
 import gradio as gr
 from kokoro import KPipeline
@@ -18,8 +25,8 @@ class GradioBackend:
     def __call__(self, message: str = None, history: list[str] = None, state: dict = {}, mode: str = 'Real-Time Commentary', **kwargs):
         return getattr(self.infer, self.mode2api[mode])(message=message, history=history, state=state, **kwargs)
-gradio_backend = None
 with gr.Blocks() as demo:
     gr.Markdown("## LiveCC Conversation and Real-Time Commentary - Gradio Demo")
     gr.Markdown("### [LiveCC: Learning Video LLM with Streaming Speech Transcription at Scale (CVPR 2025)](https://showlab.github.io/livecc/)")
@@ -40,7 +47,6 @@ with gr.Blocks() as demo:
                 visible=True,
                 sources=['upload'],
                 autoplay=True,
-                include_audio=False,
                 width=720,
                 height=480
             )
@@ -57,34 +63,37 @@ with gr.Blocks() as demo:
             with gr.Row():
                 gr_radio_mode = gr.Radio(label="Select Mode", choices=["Real-Time Commentary", "Conversation"], elem_id="gr_radio_mode", value='Real-Time Commentary', interactive=True)
-            @spaces.GPU
             def gr_chatinterface_fn(message, history, state, video_path, mode):
                 global gradio_backend
-                yield '(initializing model, thanks for waiting...)'
                 if gradio_backend is None:
                     gradio_backend = GradioBackend()
                 state['video_path'] = video_path
-                yield '(finished initialization, responding...)'
                 if mode != 'Conversation':
                     yield 'waiting video input...'
-                response, state = gradio_backend(message=message, history=history, state=state, mode=mode)
-                yield response
             def gr_chatinterface_chatbot_clear_fn():
                 return {}, {}, 0, 0
             gr_chatinterface = gr.ChatInterface(
                 fn=gr_chatinterface_fn,
                 type="messages",
-                additional_inputs=[gr_state, gr_video, gr_radio_mode]
             )
             gr_chatinterface.chatbot.clear(fn=gr_chatinterface_chatbot_clear_fn, outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
             gr_clean_button.click(fn=lambda :[[], *gr_chatinterface_chatbot_clear_fn()], outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
             def gr_for_streaming(history: list[gr.ChatMessage], video_state: dict, state: dict, mode: str, static_trigger: int, dynamic_trigger: int):
-                # if static_trigger == 0:
-                #     return gr_chatinterface_chatbot_clear_fn()
-                # if video_state['video_path'] != state.get('video_path', None):
-                #     return gr_chatinterface_chatbot_clear_fn()
                 state.update(video_state)
                 query, assistant_waiting_message = None, None
                 for message in history[::-1]:
@@ -100,7 +109,7 @@ with gr.Blocks() as demo:
                     elif message['content'] == GradioBackend.waiting_video_response:
                         assistant_waiting_message = message
-                for (start_timestamp, stop_timestamp), response, state in gradio_backend(query=query, state=state, mode=mode):
                     if start_timestamp >= 0:
                         response_with_timestamp = f'{start_timestamp:.1f}s-{stop_timestamp:.1f}s: {response}'
                         if assistant_waiting_message is None:
@@ -109,7 +118,10 @@ with gr.Blocks() as demo:
                             assistant_waiting_message['content'] = response_with_timestamp
                             assistant_waiting_message = None
                         yield history, state, dynamic_trigger
-                yield history, state, 1 - dynamic_trigger
             js_video_timestamp_fetcher = """
                 (state, video_state) => {

+hf_spaces = False
+js_monitor = False # if False, will not care about the actual video timestamp in front end. Suitable for enviroment with unsolvable latency (e.g. hf spaces)
+if hf_spaces:
+    try:
+        import spaces
+    except Exception as e:
+        print(e)
+import os
 import gradio as gr
 from kokoro import KPipeline
     def __call__(self, message: str = None, history: list[str] = None, state: dict = {}, mode: str = 'Real-Time Commentary', **kwargs):
         return getattr(self.infer, self.mode2api[mode])(message=message, history=history, state=state, **kwargs)
+gradio_backend = None if hf_spaces else GradioBackend()
 with gr.Blocks() as demo:
     gr.Markdown("## LiveCC Conversation and Real-Time Commentary - Gradio Demo")
     gr.Markdown("### [LiveCC: Learning Video LLM with Streaming Speech Transcription at Scale (CVPR 2025)](https://showlab.github.io/livecc/)")
                 visible=True,
                 sources=['upload'],
                 autoplay=True,
                 width=720,
                 height=480
             )
             with gr.Row():
                 gr_radio_mode = gr.Radio(label="Select Mode", choices=["Real-Time Commentary", "Conversation"], elem_id="gr_radio_mode", value='Real-Time Commentary', interactive=True)
+            # @spaces.GPU
             def gr_chatinterface_fn(message, history, state, video_path, mode):
                 global gradio_backend
                 if gradio_backend is None:
+                    yield '(ZeroGPU needs to initialize model under @spaces.GPU, thanks for waiting...)', state
                     gradio_backend = GradioBackend()
+                    yield '(finished initialization, responding...)', state
                 state['video_path'] = video_path
                 if mode != 'Conversation':
                     yield 'waiting video input...'
+                response, state = gradio_backend(message=message, history=history, state=state, mode=mode, hf_spaces=hf_spaces)
+                yield response, state
             def gr_chatinterface_chatbot_clear_fn():
                 return {}, {}, 0, 0
             gr_chatinterface = gr.ChatInterface(
                 fn=gr_chatinterface_fn,
                 type="messages",
+                additional_inputs=[gr_state, gr_video, gr_radio_mode],
+                additional_outputs=[gr_state]
             )
             gr_chatinterface.chatbot.clear(fn=gr_chatinterface_chatbot_clear_fn, outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
             gr_clean_button.click(fn=lambda :[[], *gr_chatinterface_chatbot_clear_fn()], outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
             def gr_for_streaming(history: list[gr.ChatMessage], video_state: dict, state: dict, mode: str, static_trigger: int, dynamic_trigger: int):
+                if static_trigger == 0:
+                    yield [], {}, dynamic_trigger
+                    return
+                yield history + [gr.ChatMessage(role="assistant", content='Loading video... thanks for waiting...')], state, dynamic_trigger
+                if not js_monitor:
+                    video_state['video_timestamp'] = 19260817 # 👓
                 state.update(video_state)
                 query, assistant_waiting_message = None, None
                 for message in history[::-1]:
                     elif message['content'] == GradioBackend.waiting_video_response:
                         assistant_waiting_message = message
+                for (start_timestamp, stop_timestamp), response, state in gradio_backend(query=query, state=state, mode=mode, hf_spaces=hf_spaces):
                     if start_timestamp >= 0:
                         response_with_timestamp = f'{start_timestamp:.1f}s-{stop_timestamp:.1f}s: {response}'
                         if assistant_waiting_message is None:
                             assistant_waiting_message['content'] = response_with_timestamp
                             assistant_waiting_message = None
                         yield history, state, dynamic_trigger
+                if js_monitor:
+                    yield history, state, 1 - dynamic_trigger
+                else:
+                    yield history, state, dynamic_trigger
             js_video_timestamp_fetcher = """
                 (state, video_state) => {