chenjoya commited on
Commit
292389d
·
verified ·
1 Parent(s): ea5bc09

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -16
app.py CHANGED
@@ -1,4 +1,11 @@
1
- import spaces, os
 
 
 
 
 
 
 
2
  import gradio as gr
3
 
4
  from kokoro import KPipeline
@@ -18,8 +25,8 @@ class GradioBackend:
18
  def __call__(self, message: str = None, history: list[str] = None, state: dict = {}, mode: str = 'Real-Time Commentary', **kwargs):
19
  return getattr(self.infer, self.mode2api[mode])(message=message, history=history, state=state, **kwargs)
20
 
21
- gradio_backend = None
22
-
23
  with gr.Blocks() as demo:
24
  gr.Markdown("## LiveCC Conversation and Real-Time Commentary - Gradio Demo")
25
  gr.Markdown("### [LiveCC: Learning Video LLM with Streaming Speech Transcription at Scale (CVPR 2025)](https://showlab.github.io/livecc/)")
@@ -40,7 +47,6 @@ with gr.Blocks() as demo:
40
  visible=True,
41
  sources=['upload'],
42
  autoplay=True,
43
- include_audio=False,
44
  width=720,
45
  height=480
46
  )
@@ -57,34 +63,37 @@ with gr.Blocks() as demo:
57
  with gr.Row():
58
  gr_radio_mode = gr.Radio(label="Select Mode", choices=["Real-Time Commentary", "Conversation"], elem_id="gr_radio_mode", value='Real-Time Commentary', interactive=True)
59
 
60
- @spaces.GPU
61
  def gr_chatinterface_fn(message, history, state, video_path, mode):
62
  global gradio_backend
63
- yield '(initializing model, thanks for waiting...)'
64
  if gradio_backend is None:
 
65
  gradio_backend = GradioBackend()
 
66
  state['video_path'] = video_path
67
- yield '(finished initialization, responding...)'
68
  if mode != 'Conversation':
69
  yield 'waiting video input...'
70
- response, state = gradio_backend(message=message, history=history, state=state, mode=mode)
71
- yield response
72
 
73
  def gr_chatinterface_chatbot_clear_fn():
74
  return {}, {}, 0, 0
75
  gr_chatinterface = gr.ChatInterface(
76
  fn=gr_chatinterface_fn,
77
  type="messages",
78
- additional_inputs=[gr_state, gr_video, gr_radio_mode]
 
79
  )
80
  gr_chatinterface.chatbot.clear(fn=gr_chatinterface_chatbot_clear_fn, outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
81
  gr_clean_button.click(fn=lambda :[[], *gr_chatinterface_chatbot_clear_fn()], outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
82
 
83
  def gr_for_streaming(history: list[gr.ChatMessage], video_state: dict, state: dict, mode: str, static_trigger: int, dynamic_trigger: int):
84
- # if static_trigger == 0:
85
- # return gr_chatinterface_chatbot_clear_fn()
86
- # if video_state['video_path'] != state.get('video_path', None):
87
- # return gr_chatinterface_chatbot_clear_fn()
 
 
88
  state.update(video_state)
89
  query, assistant_waiting_message = None, None
90
  for message in history[::-1]:
@@ -100,7 +109,7 @@ with gr.Blocks() as demo:
100
  elif message['content'] == GradioBackend.waiting_video_response:
101
  assistant_waiting_message = message
102
 
103
- for (start_timestamp, stop_timestamp), response, state in gradio_backend(query=query, state=state, mode=mode):
104
  if start_timestamp >= 0:
105
  response_with_timestamp = f'{start_timestamp:.1f}s-{stop_timestamp:.1f}s: {response}'
106
  if assistant_waiting_message is None:
@@ -109,7 +118,10 @@ with gr.Blocks() as demo:
109
  assistant_waiting_message['content'] = response_with_timestamp
110
  assistant_waiting_message = None
111
  yield history, state, dynamic_trigger
112
- yield history, state, 1 - dynamic_trigger
 
 
 
113
 
114
  js_video_timestamp_fetcher = """
115
  (state, video_state) => {
 
1
+ hf_spaces = False
2
+ js_monitor = False # if False, will not care about the actual video timestamp in front end. Suitable for enviroment with unsolvable latency (e.g. hf spaces)
3
+ if hf_spaces:
4
+ try:
5
+ import spaces
6
+ except Exception as e:
7
+ print(e)
8
+ import os
9
  import gradio as gr
10
 
11
  from kokoro import KPipeline
 
25
  def __call__(self, message: str = None, history: list[str] = None, state: dict = {}, mode: str = 'Real-Time Commentary', **kwargs):
26
  return getattr(self.infer, self.mode2api[mode])(message=message, history=history, state=state, **kwargs)
27
 
28
+ gradio_backend = None if hf_spaces else GradioBackend()
29
+
30
  with gr.Blocks() as demo:
31
  gr.Markdown("## LiveCC Conversation and Real-Time Commentary - Gradio Demo")
32
  gr.Markdown("### [LiveCC: Learning Video LLM with Streaming Speech Transcription at Scale (CVPR 2025)](https://showlab.github.io/livecc/)")
 
47
  visible=True,
48
  sources=['upload'],
49
  autoplay=True,
 
50
  width=720,
51
  height=480
52
  )
 
63
  with gr.Row():
64
  gr_radio_mode = gr.Radio(label="Select Mode", choices=["Real-Time Commentary", "Conversation"], elem_id="gr_radio_mode", value='Real-Time Commentary', interactive=True)
65
 
66
+ # @spaces.GPU
67
  def gr_chatinterface_fn(message, history, state, video_path, mode):
68
  global gradio_backend
 
69
  if gradio_backend is None:
70
+ yield '(ZeroGPU needs to initialize model under @spaces.GPU, thanks for waiting...)', state
71
  gradio_backend = GradioBackend()
72
+ yield '(finished initialization, responding...)', state
73
  state['video_path'] = video_path
 
74
  if mode != 'Conversation':
75
  yield 'waiting video input...'
76
+ response, state = gradio_backend(message=message, history=history, state=state, mode=mode, hf_spaces=hf_spaces)
77
+ yield response, state
78
 
79
  def gr_chatinterface_chatbot_clear_fn():
80
  return {}, {}, 0, 0
81
  gr_chatinterface = gr.ChatInterface(
82
  fn=gr_chatinterface_fn,
83
  type="messages",
84
+ additional_inputs=[gr_state, gr_video, gr_radio_mode],
85
+ additional_outputs=[gr_state]
86
  )
87
  gr_chatinterface.chatbot.clear(fn=gr_chatinterface_chatbot_clear_fn, outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
88
  gr_clean_button.click(fn=lambda :[[], *gr_chatinterface_chatbot_clear_fn()], outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
89
 
90
  def gr_for_streaming(history: list[gr.ChatMessage], video_state: dict, state: dict, mode: str, static_trigger: int, dynamic_trigger: int):
91
+ if static_trigger == 0:
92
+ yield [], {}, dynamic_trigger
93
+ return
94
+ yield history + [gr.ChatMessage(role="assistant", content='Loading video... thanks for waiting...')], state, dynamic_trigger
95
+ if not js_monitor:
96
+ video_state['video_timestamp'] = 19260817 # 👓
97
  state.update(video_state)
98
  query, assistant_waiting_message = None, None
99
  for message in history[::-1]:
 
109
  elif message['content'] == GradioBackend.waiting_video_response:
110
  assistant_waiting_message = message
111
 
112
+ for (start_timestamp, stop_timestamp), response, state in gradio_backend(query=query, state=state, mode=mode, hf_spaces=hf_spaces):
113
  if start_timestamp >= 0:
114
  response_with_timestamp = f'{start_timestamp:.1f}s-{stop_timestamp:.1f}s: {response}'
115
  if assistant_waiting_message is None:
 
118
  assistant_waiting_message['content'] = response_with_timestamp
119
  assistant_waiting_message = None
120
  yield history, state, dynamic_trigger
121
+ if js_monitor:
122
+ yield history, state, 1 - dynamic_trigger
123
+ else:
124
+ yield history, state, dynamic_trigger
125
 
126
  js_video_timestamp_fetcher = """
127
  (state, video_state) => {