Spaces:
Runtime error
Runtime error
File size: 12,894 Bytes
cb0791d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 |
import argparse
import os
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
# ffmpeg_path = f"{ROOT_DIR}/bin" # 替换成你的 FFmpeg bin 目录
# os.environ["PATH"] = os.environ.get("PATH", "") + os.pathsep + ffmpeg_path
import gradio as gr
from utils import movie2audio,make_srt,make_tran,merge_sub,make_tran_zh2en,make_tran_ja2zh,make_tran_ko2zh,make_srt_sv,make_tran_qwen2,make_tran_deep
from subtitle_to_audio import generate_audio
import pyttsx3
engine = pyttsx3.init()
voices = engine.getProperty('voices') # getting details of current voice
vlist = []
num = 0
for voice in voices:
print(" - Name: %s" % voice.name)
vlist.append((voice.name,num))
num += 1
initial_md = """
项目地址:https://github.com/v3ucn/Modelscope_Faster_Whisper_Multi_Subtitle
作者:刘悦的技术博客 https://space.bilibili.com/3031494
"""
def do_pyttsx3(srt,speed,voice):
print(srt,speed,voice)
voice = int(voice)
generate_audio(path=srt,rate=int(speed),voice_idx=voice)
return f"output/{vlist[voice][0]}.wav"
def do_speech(video):
res = movie2audio(video)
return res
def do_trans_video(model_type,video_path):
srt_text = make_srt(video_path,model_type)
return srt_text
def do_trans_video_sv(video_path):
srt_text = make_srt_sv(video_path)
return srt_text
def do_trans_audio(model_type):
srt_text = make_srt(f'{ROOT_DIR}/audio.wav',model_type)
return srt_text
def do_trans_en2zh(srt_path):
return make_tran(srt_path)
def do_trans_en2zh_deep(srt_path):
return make_tran_deep(srt_path,"EN","ZH")
def do_trans_zh2en_deep(srt_path):
return make_tran_deep(srt_path,"ZH","EN")
def do_trans_zh2ja_deep(srt_path):
return make_tran_deep(srt_path,"ZH","JA")
def do_trans_zh2ko_deep(srt_path):
return make_tran_deep(srt_path,"ZH","KO")
def do_trans_ja2zh_deep(srt_path):
return make_tran_deep(srt_path,"JA","ZH")
def do_trans_ko2zh_deep(srt_path):
return make_tran_deep(srt_path,"KO","ZH")
def do_trans_en2zh_qwen2(model_path_qwen2,srt_path):
return make_tran_qwen2(model_path_qwen2,srt_path,"zh")
def do_trans_zh2en_qwen2(model_path_qwen2,srt_path):
return make_tran_qwen2(model_path_qwen2,srt_path,"en")
def do_trans_ja2zh_qwen2(model_path_qwen2,srt_path):
return make_tran_qwen2(model_path_qwen2,srt_path,"zh")
def do_trans_ko2zh_qwen2(model_path_qwen2,srt_path):
return make_tran_qwen2(model_path_qwen2,srt_path,"zh")
def do_trans_zh2en(srt_path):
return make_tran_zh2en(srt_path)
def do_trans_ja2zh(srt_path):
return make_tran_ja2zh(srt_path)
def do_trans_ko2zh(srt_path):
return make_tran_ko2zh(srt_path)
def do_srt_sin(video_path):
return merge_sub(video_path,f"{ROOT_DIR}/output/video.srt")
def do_srt_two(video_path):
return merge_sub(video_path,f"{ROOT_DIR}/output/two.srt")
def do_srt_two_single(video_path):
return merge_sub(video_path,f"{ROOT_DIR}/output/two_single.srt")
def save_srt(text):
with open(rf'{ROOT_DIR}/output/video.srt','w',encoding='utf-8') as f:
f.write(text + "\n")
gr.Info('字幕文件修改成功,字幕保存在output目录')
def save_two(text,text_2):
with open(rf'{ROOT_DIR}/output/two.srt','w',encoding='utf-8') as f:
f.write(text + "\n")
with open(rf'{ROOT_DIR}/output/two_single.srt','w',encoding='utf-8') as f:
f.write(text_2 + "\n")
gr.Info('字幕文件修改成功,字幕保存在output目录')
with gr.Blocks() as app:
gr.Markdown(initial_md)
with gr.Accordion("视频处理(Video)"):
with gr.Row():
ori_video = gr.Video(label="请上传视频(Upload Video)")
speech_button = gr.Button("提取人声(如果视频没有背景音也可以不做)Extract human voice (you don't have to do it if the video has no background sound)")
speech_audio = gr.Audio(label="提取的人声(Extract voice)")
speech_button.click(do_speech,inputs=[ori_video],outputs=[speech_audio])
with gr.Accordion("转写字幕"):
with gr.Row():
with gr.Column():
# model_type = gr.Dropdown(choices=["small","medium","large-v3","large-v2"], value="small", label="选择faster_Whisper模型/Select faster_Whisper model",interactive=True)
model_type = gr.Textbox(label="填写faster_Whisper模型/Fill in the faster_Whisper model,也可以填写small,medium,large,large-v2,large-v3,faster-whisper-large-v3-turbo-ct2,模型越大,速度越慢,但字幕的准确度越高,酌情填写,用文本框是因为你可以填写其他huggingface上的开源模型地址",value="faster-whisper-large-v3-turbo-ct2")
# with gr.Row():
# with gr.Column():
# language = gr.Dropdown(["ja", "en", "zh","ko","yue"], value="zh", label="选择转写的语言",interactive=True)
with gr.Row():
transcribe_button_whisper = gr.Button("Whisper视频直接转写字幕(Video direct rewriting subtitles)")
transcribe_button_audio = gr.Button("Whisper提取人声转写字幕(Extract voice transliteration subtitles)")
# transcribe_button_video_sv = gr.Button("阿里SenseVoice视频直接转写字幕")
result1 = gr.Textbox(label="字幕結果(会在项目目录生成video.srt/video.srt is generated in the current directory)",value=" ",interactive=True)
transcribe_button_audio_save = gr.Button("保存字幕修改结果")
transcribe_button_whisper.click(do_trans_video,inputs=[model_type,ori_video],outputs=[result1])
transcribe_button_audio_save.click(save_srt,inputs=[result1],outputs=[])
# transcribe_button_video_sv.click(do_trans_video_sv,inputs=[ori_video],outputs=[result1])
transcribe_button_audio.click(do_trans_audio,inputs=[model_type],outputs=[result1])
# with gr.Accordion("HuggingFace大模型字幕翻译"):
# with gr.Row():
# srt_path = gr.Textbox(label="原始字幕地址,默认为项目目录中的video.srt,也可以输入其他路径",value="./video.srt")
# trans_button_en2zh = gr.Button("翻译英语字幕为中文/Translate English subtitles into Chinese")
# trans_button_zh2en = gr.Button("翻译中文字幕为英文/Translate Chinese subtitles into English")
# trans_button_ja2zh = gr.Button("翻译日文字幕为中文/Translate Japanese subtitles into Chinese")
# trans_button_ko2zh = gr.Button("翻译韩文字幕为中文/Translate Korea subtitles into Chinese")
# result2 = gr.Textbox(label="翻译结果(会在项目目录生成two.srt/two.srt is generated in the current directory)")
# trans_button_en2zh.click(do_trans_en2zh,[srt_path],outputs=[result2])
# trans_button_zh2en.click(do_trans_zh2en,[srt_path],outputs=[result2])
# trans_button_ja2zh.click(do_trans_ja2zh,[srt_path],outputs=[result2])
# trans_button_ko2zh.click(do_trans_ko2zh,[srt_path],outputs=[result2])
with gr.Accordion("Qwen2大模型字幕翻译"):
with gr.Row():
srt_path_qwen2 = gr.Textbox(label="原始字幕地址,默认为项目目录中的output/video.srt,也可以输入其他路径",value=f"{ROOT_DIR}/output/video.srt")
model_path_qwen2 = gr.Textbox(label="ollama中模型名称",value="qwen2:7b")
trans_button_en2zh_qwen2 = gr.Button("翻译英语字幕为中文/Translate English subtitles into Chinese")
trans_button_zh2en_qwen2 = gr.Button("翻译中文字幕为英文/Translate Chinese subtitles into English")
trans_button_ja2zh_qwen2 = gr.Button("翻译日文字幕为中文/Translate Japanese subtitles into Chinese")
trans_button_ko2zh_qwen2 = gr.Button("翻译韩文字幕为中文/Translate Korea subtitles into Chinese")
with gr.Row():
result2 = gr.Textbox(label="翻译结果(会在项目目录生成two.srt/two.srt is generated in the current directory)",value=" ",interactive=True)
result3 = gr.Textbox(label="翻译结果(会在项目目录生成two_single.srt)",value=" ",interactive=True)
trans_button_ko2zh_qwen2_save = gr.Button("保存修改结果")
trans_button_en2zh_qwen2.click(do_trans_en2zh_qwen2,[model_path_qwen2,srt_path_qwen2],outputs=[result2,result3])
trans_button_zh2en_qwen2.click(do_trans_zh2en_qwen2,[model_path_qwen2,srt_path_qwen2],outputs=[result2,result3])
trans_button_ja2zh_qwen2.click(do_trans_ja2zh_qwen2,[model_path_qwen2,srt_path_qwen2],outputs=[result2,result3])
trans_button_ko2zh_qwen2.click(do_trans_ko2zh_qwen2,[model_path_qwen2,srt_path_qwen2],outputs=[result2,result3])
trans_button_ko2zh_qwen2_save.click(save_two,[result2,result3],outputs=[])
with gr.Accordion("Deepl字幕翻译"):
with gr.Row():
srt_path_deep = gr.Textbox(label="原始字幕地址,默认为项目目录中的output/video.srt,也可以输入其他路径",value=f"{ROOT_DIR}/output/video.srt")
trans_button_en2zh_deep = gr.Button("翻译英语字幕为中文/Translate English subtitles into Chinese")
trans_button_zh2en_deep = gr.Button("翻译中文字幕为英文/Translate Chinese subtitles into English")
trans_button_zh2ja_deep = gr.Button("翻译中文字幕为日文/Translate Chinese subtitles into Japanese")
trans_button_zh2ko_deep = gr.Button("翻译中文字幕为韩文/Translate Chinese subtitles into Korea")
trans_button_ja2zh_deep = gr.Button("翻译日文字幕为中文/Translate Japanese subtitles into Chinese")
trans_button_ko2zh_deep = gr.Button("翻译韩文字幕为中文/Translate Korea subtitles into Chinese")
with gr.Row():
result2_deep = gr.Textbox(label="翻译结果(会在项目目录生成two.srt/two.srt is generated in the current directory)",value=" ",interactive=True)
result3_deep = gr.Textbox(label="翻译结果(会在项目目录生成two_single.srt)",value=" ",interactive=True)
trans_button_ko2zh_deep_save = gr.Button("保存修改结果")
trans_button_ko2zh_deep_save.click(save_two,[result2_deep,result3_deep],outputs=[])
with gr.Accordion("字幕配音(pyttsx3)"):
with gr.Row():
srt_path_pyttsx3 = gr.Textbox(label="字幕地址,也可以输入其他路径",value=f"{ROOT_DIR}/output/eng.srt",interactive=True)
speed_pyttsx3 = gr.Textbox(label="配音语速(很重要,否则会引起时间轴错乱的问题)",value="240")
voice_pyttsx3 = gr.Dropdown(choices=vlist,value=3,label="配音的音色选择",interactive=True)
button_pyttsx3 = gr.Button("生成配音")
pyttsx3_audio = gr.Audio(label="配音的结果")
trans_button_en2zh_deep.click(do_trans_en2zh_deep,[srt_path_deep],outputs=[result2_deep,result3_deep,srt_path_pyttsx3])
trans_button_zh2ja_deep.click(do_trans_zh2ja_deep,[srt_path_deep],outputs=[result2_deep,result3_deep,srt_path_pyttsx3])
trans_button_zh2en_deep.click(do_trans_zh2en_deep,[srt_path_deep],outputs=[result2_deep,result3_deep,srt_path_pyttsx3])
trans_button_zh2ko_deep.click(do_trans_zh2ko_deep,[srt_path_deep],outputs=[result2_deep,result3_deep,srt_path_pyttsx3])
trans_button_ja2zh_deep.click(do_trans_ja2zh_deep,[srt_path_deep],outputs=[result2_deep,result3_deep,srt_path_pyttsx3])
trans_button_ko2zh_deep.click(do_trans_ko2zh_deep,[srt_path_deep],outputs=[result2_deep,result3_deep,srt_path_pyttsx3])
button_pyttsx3.click(do_pyttsx3,inputs=[srt_path_pyttsx3,speed_pyttsx3,voice_pyttsx3],outputs=[pyttsx3_audio])
with gr.Accordion("字幕合并"):
with gr.Row():
srt_button_sin = gr.Button("将单语字幕合并到视频/Merge monolingual subtitles into video")
srt_button_two = gr.Button("将双语字幕合并到视频/Merge bilingual subtitles into video")
srt_button_two_single = gr.Button("将翻译的单语字幕合并到视频")
result3 = gr.Video(label="带字幕视频")
srt_button_sin.click(do_srt_sin,inputs=[ori_video],outputs=[result3])
srt_button_two.click(do_srt_two,inputs=[ori_video],outputs=[result3])
srt_button_two.click(do_srt_two_single,inputs=[ori_video],outputs=[result3])
parser = argparse.ArgumentParser()
parser.add_argument(
"--server-name",
type=str,
default=None,
help="Server name for Gradio app",
)
parser.add_argument(
"--no-autolaunch",
action="store_true",
default=False,
help="Do not launch app automatically",
)
args = parser.parse_args()
app.queue()
app.launch(inbrowser=True, server_name=args.server_name)
|