chenjoya commited on
Commit
fdcc35d
·
1 Parent(s): 5123462
Files changed (2) hide show
  1. app.py +6 -1
  2. demo/infer.py +2 -8
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import gradio as gr
2
 
3
  from demo.infer import LiveCCDemoInfer
@@ -9,11 +10,15 @@ class GradioBackend:
9
  'Real-Time Commentary': 'live_cc',
10
  'Conversation': 'video_qa'
11
  }
 
 
12
  def __init__(self, model_path: str = 'chenjoya/LiveCC-7B-Instruct'):
 
13
  self.infer = LiveCCDemoInfer(model_path)
14
  from kokoro import KPipeline
15
  self.audio_pipeline = KPipeline(lang_code='a')
16
-
 
17
  def __call__(self, query: str = None, state: dict = {}, mode: str = 'Real-Time Commentary', **kwargs):
18
  return getattr(self.infer, self.mode2api[mode])(query=query, state=state, **kwargs)
19
 
 
1
+ import spaces, os
2
  import gradio as gr
3
 
4
  from demo.infer import LiveCCDemoInfer
 
10
  'Real-Time Commentary': 'live_cc',
11
  'Conversation': 'video_qa'
12
  }
13
+
14
+ @spaces.GPU
15
  def __init__(self, model_path: str = 'chenjoya/LiveCC-7B-Instruct'):
16
+ os.system('pip install flash-attn --no-build-isolation')
17
  self.infer = LiveCCDemoInfer(model_path)
18
  from kokoro import KPipeline
19
  self.audio_pipeline = KPipeline(lang_code='a')
20
+
21
+ @spaces.GPU
22
  def __call__(self, query: str = None, state: dict = {}, mode: str = 'Real-Time Commentary', **kwargs):
23
  return getattr(self.infer, self.mode2api[mode])(query=query, state=state, **kwargs)
24
 
demo/infer.py CHANGED
@@ -5,8 +5,6 @@ from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, LogitsP
5
  from livecc_utils import prepare_multiturn_multimodal_inputs_for_generation, get_smart_resized_clip, get_smart_resized_video_reader
6
  from qwen_vl_utils import process_vision_info
7
 
8
- import spaces
9
-
10
  logger = logging.get_logger(__name__)
11
 
12
  class ThresholdLogitsProcessor(LogitsProcessor):
@@ -34,15 +32,12 @@ class LiveCCDemoInfer:
34
  streaming_time_interval = streaming_fps_frames / fps
35
  frame_time_interval = 1 / fps
36
 
37
- @spaces.GPU
38
  def __init__(self, model_path: str = None, device_id: int = 0):
39
- os.system('pip install flash-attn --no-build-isolation')
40
  self.model = Qwen2VLForConditionalGeneration.from_pretrained(
41
  model_path, torch_dtype="auto",
42
- # device_map=f'cuda:{device_id}',
43
- # attn_implementation='flash_attention_2'
44
  )
45
- self.model.to('cuda')
46
  self.processor = AutoProcessor.from_pretrained(model_path, use_fast=False)
47
  self.streaming_eos_token_id = self.processor.tokenizer(' ...').input_ids[-1]
48
  self.model.prepare_inputs_for_generation = functools.partial(prepare_multiturn_multimodal_inputs_for_generation, self.model)
@@ -56,7 +51,6 @@ class LiveCCDemoInfer:
56
  self.system_prompt_offset = texts.index('<|im_start|>user')
57
  self._cached_video_readers_with_hw = {}
58
 
59
- @spaces.GPU
60
  @torch.inference_mode()
61
  def live_cc(
62
  self,
 
5
  from livecc_utils import prepare_multiturn_multimodal_inputs_for_generation, get_smart_resized_clip, get_smart_resized_video_reader
6
  from qwen_vl_utils import process_vision_info
7
 
 
 
8
  logger = logging.get_logger(__name__)
9
 
10
  class ThresholdLogitsProcessor(LogitsProcessor):
 
32
  streaming_time_interval = streaming_fps_frames / fps
33
  frame_time_interval = 1 / fps
34
 
 
35
  def __init__(self, model_path: str = None, device_id: int = 0):
 
36
  self.model = Qwen2VLForConditionalGeneration.from_pretrained(
37
  model_path, torch_dtype="auto",
38
+ device_map=f'cuda:{device_id}',
39
+ attn_implementation='flash_attention_2'
40
  )
 
41
  self.processor = AutoProcessor.from_pretrained(model_path, use_fast=False)
42
  self.streaming_eos_token_id = self.processor.tokenizer(' ...').input_ids[-1]
43
  self.model.prepare_inputs_for_generation = functools.partial(prepare_multiturn_multimodal_inputs_for_generation, self.model)
 
51
  self.system_prompt_offset = texts.index('<|im_start|>user')
52
  self._cached_video_readers_with_hw = {}
53
 
 
54
  @torch.inference_mode()
55
  def live_cc(
56
  self,