lym0302 commited on
Commit
4083b70
·
1 Parent(s): 77dc150
Files changed (1) hide show
  1. pipeline/step02.py +16 -16
pipeline/step02.py CHANGED
@@ -16,14 +16,14 @@ class Step02:
16
 
17
  def run_step0(self, video_path, modal_type='v'):
18
  question = f"Generate high-quality audio from video step-by-step."
19
- if modal_type == "a":
20
- self.model.model.vision_tower = None
21
- elif modal_type == "v":
22
- self.model.model.audio_tower = None
23
- elif modal_type == "av":
24
- pass
25
- else:
26
- raise NotImplementedError
27
 
28
  self.log.info("######################################################################################################")
29
  self.log.info("Generate high-quality audio from video step-by-step...")
@@ -42,14 +42,14 @@ class Step02:
42
 
43
  def run_step2(self, video_audio_path, modal_type='av'):
44
  question = f"Given a video and its corresponding audio, determine whether the audio contains voice-over? Options: A. Yes, B. No. Choose A or B."
45
- if modal_type == "a":
46
- self.model.model.vision_tower = None
47
- elif modal_type == "v":
48
- self.model.model.audio_tower = None
49
- elif modal_type == "av":
50
- pass
51
- else:
52
- raise NotImplementedError
53
  audio_video_tensor = self.preprocess(video_audio_path, va=True)
54
  output = mm_infer(
55
  audio_video_tensor,
 
16
 
17
  def run_step0(self, video_path, modal_type='v'):
18
  question = f"Generate high-quality audio from video step-by-step."
19
+ # if modal_type == "a":
20
+ # self.model.model.vision_tower = None
21
+ # elif modal_type == "v":
22
+ # self.model.model.audio_tower = None
23
+ # elif modal_type == "av":
24
+ # pass
25
+ # else:
26
+ # raise NotImplementedError
27
 
28
  self.log.info("######################################################################################################")
29
  self.log.info("Generate high-quality audio from video step-by-step...")
 
42
 
43
  def run_step2(self, video_audio_path, modal_type='av'):
44
  question = f"Given a video and its corresponding audio, determine whether the audio contains voice-over? Options: A. Yes, B. No. Choose A or B."
45
+ # if modal_type == "a":
46
+ # self.model.model.vision_tower = None
47
+ # elif modal_type == "v":
48
+ # self.model.model.audio_tower = None
49
+ # elif modal_type == "av":
50
+ # pass
51
+ # else:
52
+ # raise NotImplementedError
53
  audio_video_tensor = self.preprocess(video_audio_path, va=True)
54
  output = mm_infer(
55
  audio_video_tensor,