demo-dubbing / app.py
Lagyamfi's picture
include asyncio for tts
cf4f031
raw
history blame
3.92 kB
import gradio as gr
from tqdm.asyncio import tqdm_asyncio
from pipeline import (
extract_audio_from_video,
transcribe_and_preprocess_audio,
translation_main,
tts_main,
create_combined_output,
)
from pipeline import translation_hdr, translation_url, LANG
async def process_video_translation(
input_video, speaker, progress=gr.Progress(track_tqdm=True)
):
if input_video is None:
gr.Info("Please upload a video file", duration=2)
return
total_stages = 6
output_video = f"{input_video.split('.')[0]}_translated.mp4"
with tqdm_asyncio(total=total_stages, desc="Processing video translation") as pbar:
# stage 1: extract audio from video
progress(0.1, desc="Extracting audio from video")
output_audio_path = extract_audio_from_video(input_video)
pbar.update(1)
# transcribe audio
progress(0.2, desc="Transcribing audio")
sentences = transcribe_and_preprocess_audio(output_audio_path)
pbar.update(1)
# translate to twi
progress(0.4, desc="Translating to Twi")
khaya_translations = await translation_main(
sentences, translation_url, translation_hdr, LANG
)
pbar.update(1)
# convert to speech
progress(0.7, desc="Converting to speech")
output_audio = await tts_main(khaya_translations, speaker)
# print(tts_output_files)
pbar.update(1)
progress(1.0, desc="Combining audio and video")
create_combined_output(input_video, output_audio, output_video)
pbar.update(1)
print("Video translation completed")
gr.Info(f"Video translation completed", duration=2)
return output_video
app_theme = gr.themes.Ocean(
text_size="lg",
spacing_size="lg",
)
with gr.Blocks(
theme=app_theme,
title="Video Dubbing Interface",
) as demo:
with gr.Row(variant="default"):
with gr.Column(
scale=1,
min_width=0,
):
gr.Image(
"logo_2.jpeg",
show_label=False,
height=200,
show_download_button=False,
show_fullscreen_button=False,
container=False,
show_share_button=False,
)
with gr.Column(
scale=6,
variant="default",
):
gr.HTML(
"""
<h1 style="font-size: 4em; font-weight: bold; margin-top: 0.5em; margin-left:3em">
Video Dubbing Interface
</h1>
""",
)
with gr.Column(
scale=1,
min_width=0,
):
gr.Image(
"NLPGhana_logo_2.png",
show_label=False,
height=200,
show_download_button=False,
show_fullscreen_button=False,
container=False,
show_share_button=False,
)
gr.HTML("<hr style='margin-top: 0.5em;'>")
gr.HTML("<div style='height: 20px;'></div>")
# main interface components
with gr.Row():
with gr.Column():
input_video = gr.Video(label="Input Video", sources=["upload"], height=400)
input_speaker = gr.Radio(
label="Select Speaker",
choices=["male", "female"],
value="female",
min_width=50,
container=True,
show_label=True,
)
submit = gr.Button("Process Video", scale=1)
output_video = gr.Video(label="Processed Video", height=400)
submit.click(
process_video_translation,
inputs=[input_video, input_speaker],
outputs=output_video,
)
gr.HTML("<div style='height: 10px;'></div>")
# Launch the interface
demo.launch(debug=True)