Omnibus's picture
Update vc.py
ce5d7b2 verified
raw
history blame
3.59 kB
import gradio as gr
import torch
from pathlib import Path
from pytube import YouTube
from pydub import AudioSegment
from TTS.api import TTS
import uuid
import os
uid = uuid.uuid4()
device = "cuda" if torch.cuda.is_available() else "cpu"
def custom_bark(inp, in_aud=None, trim_aud=None, in_aud_mic=None):
if in_aud_mic != None:
speaker_wav=in_aud_mic
if in_aud !=None and trim_aud==None:
speaker_wav=in_aud
#speaker_wav=Path(f"{uid}-tmp_aud.mp4")
if trim_aud != None:
speaker_wav=Path(f"{uid}-trim.wav")
tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to(device)
tts.tts_to_file(inp, speaker_wav=speaker_wav, language="en", file_path=f"{uid}-output.wav")
return (f"{uid}-output.wav")
def load_video_yt(vid):
yt = YouTube(vid)
vid = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download(filename=f"{uid}-tmp.mp4")
vid_aud = yt.streams.filter(only_audio=True)[0].download(filename=f"{uid}-tmp_aud.mp4")
print (f'Video Length: {yt.length}')
return vid, vid_aud, f"{uid}-tmp_aud.mp4"
def trim_clip(clip, start_t, end_t):
clip = Path(f"{uid}-tmp_aud.mp4")
song = AudioSegment.from_file(f"{uid}-tmp_aud.mp4", format="mp4")
#song = AudioSegment.from_file(Path(f"{clip}"), format="mp4")
start_min = int(start_t.split(":",1)[0])
start_sec = int(start_t.split(":",1)[1])
end_min = int(end_t.split(":",1)[0])
end_sec = int(end_t.split(":",1)[1])
start = ((start_min*60)+start_sec)*1000
end = ((end_min*60)+end_sec)*1000
song_clip = song[start: end]
song_clip.export(f"{uid}-trim.wav", format="wav")
print("New Audio file is created and saved")
return f"{uid}-trim.wav"
def pre_aud(inp):
print(inp)
song = AudioSegment.from_file(Path(f'{inp}'), format="mp4")
song.export(f"{uid}-tmp_aud.mp4", format="mp4")
print(f'pre_aud:: {f"{uid}-tmp_aud.mp4"}')
return inp
with gr.Blocks() as app:
with gr.Group():
with gr.Row():
in_text = gr.Textbox(lines = 6, max_lines = 20)
with gr.Column():
alt_go_btn = gr.Button()
out_audio = gr.Audio(interactive=False)
with gr.Group():
with gr.Row():
gr.Markdown('''<H1> Audio Source:''')
with gr.Row():
with gr.Column():
#in_aud_mic = gr.Audio(source='microphone')
in_aud_file = gr.Audio(label = 'Audio Source', sources=['microphone','upload'], interactive = True,type='filepath')
aud_file = gr.File(interactive=False,visible=True)
with gr.Row():
start_time = gr.Textbox(label = "Start", value = "0:00", placeholder = "0:23")
end_time = gr.Textbox(label = "End", value = "0:01", placeholder = "1:12")
trim_clip_btn = gr.Button("Trim Clip")
trim_aud = gr.Audio(label = 'Trimmed Audio Source', sources=['upload'], interactive = False)
with gr.Column():
in_aud_yt = gr.Textbox(label="YouTube URL")
load_yt_btn = gr.Button("Load URL")
yt_vid = gr.Video(interactive=False)
#in_aud_file.change(pre_aud,in_aud_file,aud_file)
load_yt_btn.click(load_video_yt, in_aud_yt, [yt_vid,in_aud_file,aud_file])
trim_clip_btn.click(trim_clip,[aud_file, start_time, end_time],trim_aud)
alt_go_btn.click(custom_bark, [in_text,in_aud_file,trim_aud], out_audio)
app.launch()