Spaces:

chenjoya
/

LiveCC

Running on Zero

chenjoya commited on Apr 23

Commit

dc317e6

verified ·

1 Parent(s): bc5cb8c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,15 +14,21 @@ class GradioBackend:
         'Conversation': 'video_qa'
     }
-    def __init__(self, ):
-        self.infer = LiveCCDemoInfer(model_path, device='cpu')
-        self.audio_pipeline = KPipeline(lang_code='a')
-    def __call__(self, query: str = None, state: dict = {}, mode: str = 'Real-Time Commentary', **kwargs):
-        return getattr(self.infer, self.mode2api[mode])(query=query, state=state, **kwargs)
-    def to(self, device):
-        self.infer.model.to(device)
 with gr.Blocks() as demo:
     gr.Markdown("## LiveCC Real-Time Commentary and Conversation - Gradio Demo")
@@ -65,10 +71,12 @@ with gr.Blocks() as demo:
             @spaces.GPU
             def gr_chatinterface_fn(message, history, state, video_path, mode):
-                gradio_backend.to('cuda')
                 state['video_path'] = video_path
-                response, state = gradio_backend(query=message, state=state, mode=mode)
-                return response, state
             def gr_chatinterface_chatbot_clear_fn():
                 return {}, {}, 0, 0
             gr_chatinterface = gr.ChatInterface(

         'Conversation': 'video_qa'
     }
+    def __init__(self):
+        # Delay model loading until we're in a GPU context
+        self.infer = None
+        self.audio_pipeline = None
+    def init_model(self, device):
+        # Instantiate inside GPU process
+        if self.infer is None:
+            self.infer = LiveCCDemoInfer(model_path, device=device)
+            self.audio_pipeline = KPipeline(lang_code='a')
+    def __call__(self, query: str = None, state: dict = {}, mode: str = 'Real-Time Commentary', **kwargs):
+        # Called only inside GPU process
+        response, state = getattr(self.infer, self.mode2api[mode])(query=query, state=state, **kwargs)
+        return response, state
 with gr.Blocks() as demo:
     gr.Markdown("## LiveCC Real-Time Commentary and Conversation - Gradio Demo")
             @spaces.GPU
             def gr_chatinterface_fn(message, history, state, video_path, mode):
+                # Initialize backend and move model to GPU inside this process
+                global gradio_backend
+                gradio_backend.init_model(device='cuda')
                 state['video_path'] = video_path
+                return gradio_backend(query=message, state=state, mode=mode)
             def gr_chatinterface_chatbot_clear_fn():
                 return {}, {}, 0, 0
             gr_chatinterface = gr.ChatInterface(