Spaces:
Running
Running
File size: 9,278 Bytes
1fd4e9c 2e6c958 1fd4e9c 2e6c958 1fd4e9c 2e6c958 1fd4e9c 2e6c958 1fd4e9c 2e6c958 1fd4e9c 2e6c958 1fd4e9c 2e6c958 1fd4e9c d3e19f4 1fd4e9c d3e19f4 1fd4e9c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
# coding=utf-8
# from .step0 import Step0
from .step1 import Step1
# from .step2 import Step2
from .step3 import Step3
from .step4 import Step4
from .step02 import Step02
import logging
import re
import os
class Pipeline:
def __init__(self, step0_model_dir, step1_mode, step2_model_dir, step2_mode, step3_mode):
# self.step0 = Step0(step0_model_dir)
self.step02 = Step02(step0_model_dir, step2_mode)
self.step1 = Step1(step1_mode)
# self.step2 = Step2(step2_model_dir, step2_mode)
self.step3 = Step3(model_type=step3_mode)
self.step4 = Step4()
self.log = logging.getLogger(self.__class__.__name__)
self.log.setLevel(logging.INFO)
def run(self, video_input, output_dir, mode='s4', postp_mode='rep', prompt='', negative_prompt='', duration=10, seed=42):
step0_resp = self.step02.run_step0(video_input)
step0_resp_list = re.findall(r'(Step\d:.*?)(?=Step\d:|$)', step0_resp, re.DOTALL)
step_infos = [step_info.strip().split("\n")[0] for step_info in step0_resp_list]
step3_temp_dir = os.path.join(output_dir, "remove_vo")
step_results = {"temp_final_audio_path": None, "temp_final_video_path": None}
for step_info in step_infos:
self.log.info(f"Start to {step_info}")
if step_info == 'Step1: Generate audio from video.':
step1_audio_path, step1_video_path = self.step1.run(video_input, output_dir, prompt, negative_prompt, duration=duration, seed=seed)
step_results["step1_audio_path"] = step1_audio_path
step_results["step1_video_path"] = step1_video_path
elif step_info == 'Step2: Given a video and its generated audio, determine whether the audio contains voice-over.':
is_vo = self.step02.run_step2(str(step_results["step1_video_path"]))
step_results["is_vo"] = is_vo
if not step_results["is_vo"]: # not voice-over
step_results["temp_final_audio_path"] = step_results["step1_audio_path"]
step_results["temp_final_video_path"] = step_results["step1_video_path"]
return step_results
elif step_info == 'Step3: Remove voice-over from audio.':
step3_audio_path = self.step3.run(input_audio_path=step_results["step1_audio_path"],
temp_store_dir=step3_temp_dir,
output_dir=output_dir)
step_results["step3_audio_path"] = step3_audio_path
if mode == 's3':
step_results["temp_final_audio_path"] = step_results["step3_audio_path"]
return step_results
elif step_info == 'Step4: Determine whether the audio is silent.':
is_silent = self.step4.run(step_results["step3_audio_path"])
step_results["is_silent"] = is_silent
else:
self.log.error(f"Step-by-Step Error !!!!!!!!!")
return step_results
if not step_results["is_silent"]: # not silent
step_results["temp_final_audio_path"] = step_results["step3_audio_path"]
else:
self.log.info(f"Start to post process, use mode: {postp_mode}")
if postp_mode == "rm":
step_results["temp_final_audio_path"] = None
elif postp_mode == "rep":
step_results["temp_final_audio_path"] = step_results["step1_audio_path"]
step_results["temp_final_video_path"] = step_results["step1_video_path"]
elif postp_mode == "neg":
neg_audio_path, neg_video_path = self.step1.run(video_input, output_dir, prompt, negative_prompt='human voice', duration=duration, seed=seed, is_postp=True)
step_results["temp_final_audio_path"] = neg_audio_path
step_results["temp_final_video_path"] = neg_video_path
else:
self.log.error(f"Error postp_mode: {postp_mode}")
self.log.info(f"After post-processing, audio is {step_results['temp_final_audio_path']} and video is {step_results['temp_final_video_path']}")
self.log.info(f"Finish Post-Process successfully.\n")
return step_results
def run_for_gradio(self, video_input, output_dir, mode='s4', postp_mode='rep', prompt='', negative_prompt='', duration=10, seed=42):
step_results = {"temp_final_audio_path": None,
"temp_final_video_path": None,
'log': ''}
step0_resp = self.step02.run_step0(video_input)
step0_resp_list = re.findall(r'(Step\d:.*?)(?=Step\d:|$)', step0_resp, re.DOTALL)
step_infos = [step_info.strip().split("\n")[0] for step_info in step0_resp_list]
step3_temp_dir = os.path.join(output_dir, "remove_vo")
for step_info in step_infos:
self.log.info(f"Start to {step_info}")
step_results['log'] = f"Start to {step_info}"
yield step_results
if step_info == 'Step1: Generate audio from video.':
step1_audio_path, step1_video_path = self.step1.run(video_input, output_dir, prompt, negative_prompt, duration=duration, seed=seed)
step_results["step1_audio_path"] = step1_audio_path
step_results["step1_video_path"] = step1_video_path
step_results['log'] = "Step1 completed."
yield step_results
elif step_info == 'Step2: Given a video and its generated audio, determine whether the audio contains voice-over.':
is_vo = self.step02.run_step2(str(step_results["step1_video_path"]))
step_results["is_vo"] = is_vo
step_results['log'] = f"Step2 completed. Contain voice-over? {'Yes' if is_vo else 'No'}"
yield step_results
if not step_results["is_vo"]: # not voice-over
step_results["temp_final_audio_path"] = step_results["step1_audio_path"]
step_results["temp_final_video_path"] = step_results["step1_video_path"]
step_results['log'] = "Finish step-by-step v2a."
yield step_results
elif step_info == 'Step3: Remove voice-over from audio.':
step3_audio_path = self.step3.run(input_audio_path=step_results["step1_audio_path"],
temp_store_dir=step3_temp_dir,
output_dir=output_dir)
step_results["step3_audio_path"] = step3_audio_path
step_results['log'] = f"Step3 completed."
yield step_results
if mode == 's3':
step_results["temp_final_audio_path"] = step_results["step3_audio_path"]
step_results['log'] = "Finish step-by-step v2a."
yield step_results
elif step_info == 'Step4: Determine whether the audio is silent.':
is_silent = self.step4.run(step_results["step3_audio_path"])
step_results["is_silent"] = is_silent
step_results['log'] = f"Step4 completed. Silent? {'Yes' if is_silent else 'No'}"
yield step_results
else:
self.log.error(f"Step-by-Step Error !!!!!!!!!")
step_results['log'] = f"Step-by-Step Error !!!!!!!!!"
yield step_results
step_results['log'] = "Finish step-by-step v2a."
yield step_results
if not step_results["is_silent"]: # not silent
step_results["temp_final_audio_path"] = step_results["step3_audio_path"]
step_results['log'] = "Finish step-by-step v2a."
yield step_results
else:
step_results['log'] = f"Post-processing with mode: {postp_mode}"
yield step_results
self.log.info(f"Start to post process, use mode: {postp_mode}")
if postp_mode == "rm":
step_results["temp_final_audio_path"] = None
elif postp_mode == "rep":
step_results["temp_final_audio_path"] = step_results["step1_audio_path"]
step_results["temp_final_video_path"] = step_results["step1_video_path"]
elif postp_mode == "neg":
neg_audio_path, neg_video_path = self.step1.run(video_input, output_dir, prompt, negative_prompt='human voice', duration=duration, seed=seed, is_postp=True)
step_results["temp_final_audio_path"] = neg_audio_path
step_results["temp_final_video_path"] = neg_video_path
else:
self.log.error(f"Error postp_mode: {postp_mode}")
self.log.info(f"After post-processing, audio is {step_results['temp_final_audio_path']} and video is {step_results['temp_final_video_path']}")
self.log.info(f"Finish Post-Process successfully.\n")
step_results['log'] = f"Post-processing completed."
yield step_results
step_results['log'] = "Finish step-by-step v2a."
yield step_results
|