Spaces:

chenjoya
/

LiveCC

Running on Zero

App Files Files Community

chenjoya commited on Apr 23

Commit

8d85e56

verified ·

1 Parent(s): 49216ee

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -11

app.py CHANGED Viewed

@@ -15,8 +15,8 @@ class GradioBackend:
         self.infer = LiveCCDemoInfer(model_path)
         self.audio_pipeline = KPipeline(lang_code='a')
-    def __call__(self, query: str = None, state: dict = {}, mode: str = 'Real-Time Commentary', **kwargs):
-        return getattr(self.infer, self.mode2api[mode])(query=query, state=state, **kwargs)
 gradio_backend = None
@@ -24,8 +24,8 @@ with gr.Blocks() as demo:
     gr.Markdown("## LiveCC Conversation and Real-Time Commentary - Gradio Demo")
     gr.Markdown("### [LiveCC: Learning Video LLM with Streaming Speech Transcription at Scale (CVPR 2025)](https://showlab.github.io/livecc/)")
     gr.Markdown("1️⃣ Select Mode, Real-Time Commentary (LiveCC) or Conversation (Common QA)")
-    gr.Markdown("2️⃣🅰️ Real-Time Commentary:  Input a query (optional) -> Click or upload a video.")
-    gr.Markdown("2️⃣🅱️ Conversation: Click or upload a video -> Input a query.")
     gr.Markdown("*Web Gradio has unexpected latency (3s~5s). If you want to enjoy the very real-time experience, please deploy locally https://github.com/showlab/livecc*")
     gr_state = gr.State({}, render=False) # control all useful state, including kv cache
     gr_video_state = gr.JSON({}, visible=False) # only record video state, belong to gr_state but lightweight
@@ -60,23 +60,22 @@ with gr.Blocks() as demo:
             @spaces.GPU
             def gr_chatinterface_fn(message, history, state, video_path, mode):
                 global gradio_backend
-                yield '(initializing model, thanks for waiting...)', state
                 if gradio_backend is None:
                     gradio_backend = GradioBackend()
                 state['video_path'] = video_path
-                yield '(finished initialization, responding...)', state
                 if mode != 'Conversation':
-                    yield 'waiting video input...', state
-                response, state = gradio_backend(query=message, state=state, mode=mode)
-                yield response, {}
             def gr_chatinterface_chatbot_clear_fn():
                 return {}, {}, 0, 0
             gr_chatinterface = gr.ChatInterface(
                 fn=gr_chatinterface_fn,
                 type="messages",
-                additional_inputs=[gr_state, gr_video, gr_radio_mode],
-                additional_outputs=[gr_state],
             )
             gr_chatinterface.chatbot.clear(fn=gr_chatinterface_chatbot_clear_fn, outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
             gr_clean_button.click(fn=lambda :[[], *gr_chatinterface_chatbot_clear_fn()], outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])

         self.infer = LiveCCDemoInfer(model_path)
         self.audio_pipeline = KPipeline(lang_code='a')
+    def __call__(self, message: str = None, history: list[str] = None, state: dict = {}, mode: str = 'Real-Time Commentary', **kwargs):
+        return getattr(self.infer, self.mode2api[mode])(message=message, history=history, state=state, **kwargs)
 gradio_backend = None
     gr.Markdown("## LiveCC Conversation and Real-Time Commentary - Gradio Demo")
     gr.Markdown("### [LiveCC: Learning Video LLM with Streaming Speech Transcription at Scale (CVPR 2025)](https://showlab.github.io/livecc/)")
     gr.Markdown("1️⃣ Select Mode, Real-Time Commentary (LiveCC) or Conversation (Common QA)")
+    gr.Markdown("2️⃣🅰️ **Real-Time Commentary:  Input a query (optional) -> Click or upload a video**.")
+    gr.Markdown("2️⃣🅱️ **Conversation: Click or upload a video -> Input a query**. But as the past_key_values support in ZeroGPU is not good, multi-turn conversation could be slower.")
     gr.Markdown("*Web Gradio has unexpected latency (3s~5s). If you want to enjoy the very real-time experience, please deploy locally https://github.com/showlab/livecc*")
     gr_state = gr.State({}, render=False) # control all useful state, including kv cache
     gr_video_state = gr.JSON({}, visible=False) # only record video state, belong to gr_state but lightweight
             @spaces.GPU
             def gr_chatinterface_fn(message, history, state, video_path, mode):
                 global gradio_backend
+                yield '(initializing model, thanks for waiting...)'
                 if gradio_backend is None:
                     gradio_backend = GradioBackend()
                 state['video_path'] = video_path
+                yield '(finished initialization, responding...)'
                 if mode != 'Conversation':
+                    yield 'waiting video input...'
+                response, state = gradio_backend(message=message, history=history, state=state, mode=mode)
+                yield response
             def gr_chatinterface_chatbot_clear_fn():
                 return {}, {}, 0, 0
             gr_chatinterface = gr.ChatInterface(
                 fn=gr_chatinterface_fn,
                 type="messages",
+                additional_inputs=[gr_state, gr_video, gr_radio_mode]
             )
             gr_chatinterface.chatbot.clear(fn=gr_chatinterface_chatbot_clear_fn, outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
             gr_clean_button.click(fn=lambda :[[], *gr_chatinterface_chatbot_clear_fn()], outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])