chenjoya commited on
Commit
ceeb44d
·
verified ·
1 Parent(s): ba2dfcb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -27
app.py CHANGED
@@ -6,29 +6,16 @@ from demo.infer import LiveCCDemoInfer
6
 
7
  model_path = 'chenjoya/LiveCC-7B-Instruct'
8
 
9
- class GradioBackend:
10
- waiting_video_response = 'Waiting for video input...'
11
- not_found_video_response = 'Video does not exist...'
12
- mode2api = {
13
- 'Real-Time Commentary': 'live_cc',
14
- 'Conversation': 'video_qa'
15
- }
16
 
17
- def __init__(self):
18
- # Delay model loading until we're in a GPU context
19
- self.infer = None
20
- self.audio_pipeline = None
21
-
22
- def init_model(self, device):
23
- # Instantiate inside GPU process
24
- if self.infer is None:
25
- self.infer = LiveCCDemoInfer(model_path, device=device)
26
- self.audio_pipeline = KPipeline(lang_code='a')
27
-
28
- def __call__(self, query: str = None, state: dict = {}, mode: str = 'Real-Time Commentary', **kwargs):
29
- # Called only inside GPU process
30
- response, state = getattr(self.infer, self.mode2api[mode])(query=query, state=state, **kwargs)
31
- return response, state
32
 
33
  with gr.Blocks() as demo:
34
  gr.Markdown("## LiveCC Real-Time Commentary and Conversation - Gradio Demo")
@@ -68,12 +55,15 @@ with gr.Blocks() as demo:
68
  gr_radio_mode = gr.Radio(label="Select Mode", choices=["Real-Time Commentary", "Conversation"], elem_id="gr_radio_mode", value='Real-Time Commentary', interactive=True)
69
 
70
  @spaces.GPU
71
- def gr_chatinterface_fn(message, history, state, video_path, mode):
72
- # Initialize backend and move model to GPU inside this process
73
- global gradio_backend
74
- gradio_backend.init_model(device='cuda')
75
  state['video_path'] = video_path
76
- return gradio_backend(query=message, state=state, mode=mode)
 
 
 
77
  def gr_chatinterface_chatbot_clear_fn():
78
  return {}, {}, 0, 0
79
  gradio_backend = GradioBackend()
 
6
 
7
  model_path = 'chenjoya/LiveCC-7B-Instruct'
8
 
9
+ def _init_infer():
10
+ # create a singleton LiveCCDemoInfer inside GPU
11
+ import torch
12
+ from kokoro import KPipeline
13
+ from demo.infer import LiveCCDemoInfer
14
+ infer = LiveCCDemoInfer(model_path, device='cuda')
15
+ return infer
16
 
17
+ # We'll keep a module-global placeholder
18
+ infer = None
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  with gr.Blocks() as demo:
21
  gr.Markdown("## LiveCC Real-Time Commentary and Conversation - Gradio Demo")
 
55
  gr_radio_mode = gr.Radio(label="Select Mode", choices=["Real-Time Commentary", "Conversation"], elem_id="gr_radio_mode", value='Real-Time Commentary', interactive=True)
56
 
57
  @spaces.GPU
58
+ def gr_chatinterface_fn(message, state, video_path, mode):
59
+ global infer
60
+ if infer is None:
61
+ infer = _init_infer()
62
  state['video_path'] = video_path
63
+ if mode == 'Conversation':
64
+ return infer.video_qa(query=message, state=state)
65
+ else:
66
+ return 'waiting video input...'
67
  def gr_chatinterface_chatbot_clear_fn():
68
  return {}, {}, 0, 0
69
  gradio_backend = GradioBackend()