Spaces:

chenjoya
/

LiveCC

Running on Zero

App Files Files Community

chenjoya commited on Apr 23

Commit

ceeb44d

verified ·

1 Parent(s): ba2dfcb

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -27

app.py CHANGED Viewed

@@ -6,29 +6,16 @@ from demo.infer import LiveCCDemoInfer
 model_path = 'chenjoya/LiveCC-7B-Instruct'
-class GradioBackend:
-    waiting_video_response = 'Waiting for video input...'
-    not_found_video_response = 'Video does not exist...'
-    mode2api = {
-        'Real-Time Commentary': 'live_cc',
-        'Conversation': 'video_qa'
-    }
-    def __init__(self):
-        # Delay model loading until we're in a GPU context
-        self.infer = None
-        self.audio_pipeline = None
-    def init_model(self, device):
-        # Instantiate inside GPU process
-        if self.infer is None:
-            self.infer = LiveCCDemoInfer(model_path, device=device)
-            self.audio_pipeline = KPipeline(lang_code='a')
-    def __call__(self, query: str = None, state: dict = {}, mode: str = 'Real-Time Commentary', **kwargs):
-        # Called only inside GPU process
-        response, state = getattr(self.infer, self.mode2api[mode])(query=query, state=state, **kwargs)
-        return response, state
 with gr.Blocks() as demo:
     gr.Markdown("## LiveCC Real-Time Commentary and Conversation - Gradio Demo")
@@ -68,12 +55,15 @@ with gr.Blocks() as demo:
                 gr_radio_mode = gr.Radio(label="Select Mode", choices=["Real-Time Commentary", "Conversation"], elem_id="gr_radio_mode", value='Real-Time Commentary', interactive=True)
             @spaces.GPU
-            def gr_chatinterface_fn(message, history, state, video_path, mode):
-                # Initialize backend and move model to GPU inside this process
-                global gradio_backend
-                gradio_backend.init_model(device='cuda')
                 state['video_path'] = video_path
-                return gradio_backend(query=message, state=state, mode=mode)
             def gr_chatinterface_chatbot_clear_fn():
                 return {}, {}, 0, 0
             gradio_backend = GradioBackend()

 model_path = 'chenjoya/LiveCC-7B-Instruct'
+def _init_infer():
+    # create a singleton LiveCCDemoInfer inside GPU
+    import torch
+    from kokoro import KPipeline
+    from demo.infer import LiveCCDemoInfer
+    infer = LiveCCDemoInfer(model_path, device='cuda')
+    return infer
+# We'll keep a module-global placeholder
+infer = None
 with gr.Blocks() as demo:
     gr.Markdown("## LiveCC Real-Time Commentary and Conversation - Gradio Demo")
                 gr_radio_mode = gr.Radio(label="Select Mode", choices=["Real-Time Commentary", "Conversation"], elem_id="gr_radio_mode", value='Real-Time Commentary', interactive=True)
             @spaces.GPU
+            def gr_chatinterface_fn(message, state, video_path, mode):
+                global infer
+                if infer is None:
+                    infer = _init_infer()
                 state['video_path'] = video_path
+                if mode == 'Conversation':
+                    return infer.video_qa(query=message, state=state)
+                else:
+                    return 'waiting video input...'
             def gr_chatinterface_chatbot_clear_fn():
                 return {}, {}, 0, 0
             gradio_backend = GradioBackend()