Spaces:
Paused
Paused
File size: 3,960 Bytes
c59a3c0 cf4f031 c59a3c0 fb7e4f3 cf4f031 c59a3c0 cf4f031 c59a3c0 cf4f031 c59a3c0 cf4f031 c59a3c0 cf4f031 c59a3c0 fb7e4f3 c59a3c0 cf4f031 c59a3c0 cf4f031 c59a3c0 cf4f031 c59a3c0 fb7e4f3 c59a3c0 fb7e4f3 c59a3c0 fb7e4f3 c59a3c0 fb7e4f3 c59a3c0 fb7e4f3 c59a3c0 fb7e4f3 c993592 fb7e4f3 c59a3c0 c993592 c59a3c0 fb7e4f3 c59a3c0 fb7e4f3 c59a3c0 fb7e4f3 c59a3c0 fb7e4f3 c59a3c0 fb7e4f3 c59a3c0 fb7e4f3 c993592 fb7e4f3 c993592 c59a3c0 fb7e4f3 c59a3c0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import gradio as gr
from tqdm.asyncio import tqdm_asyncio
from pipeline import (
extract_audio_from_video,
transcribe_and_preprocess_audio,
translation_main,
tts_main,
create_combined_output,
)
from pipeline import translation_hdr, translation_url, LANG
async def process_video_translation(
input_video, speaker, progress=gr.Progress(track_tqdm=True)
):
if input_video is None:
gr.Info("Please upload a video file", duration=2)
return
total_stages = 6
output_video = f"{input_video.split('.')[0]}_translated.mp4"
with tqdm_asyncio(total=total_stages, desc="Processing video translation") as pbar:
# stage 1: extract audio from video
progress(0.1, desc="Extracting audio from video")
output_audio_path = extract_audio_from_video(input_video)
pbar.update(1)
# transcribe audio
progress(0.2, desc="Transcribing audio")
sentences = transcribe_and_preprocess_audio(output_audio_path)
pbar.update(1)
# translate to twi
progress(0.4, desc="Translating to Twi")
khaya_translations = await translation_main(
sentences, translation_url, translation_hdr, LANG
)
pbar.update(1)
# convert to speech
progress(0.7, desc="Converting to speech")
output_audio = await tts_main(khaya_translations, speaker)
# print(tts_output_files)
pbar.update(1)
progress(1.0, desc="Combining audio and video")
create_combined_output(input_video, output_audio, output_video)
pbar.update(1)
print("Video translation completed")
gr.Info(f"Video translation completed", duration=2)
return output_video
app_theme = gr.themes.Ocean(
text_size="lg",
spacing_size="lg",
)
with gr.Blocks(
theme=app_theme,
title="Video Dubbing Interface",
) as demo:
with gr.Row(variant="default"):
with gr.Column(
scale=1,
min_width=0,
):
gr.Image(
"logo_2.jpeg",
show_label=False,
height=200,
show_download_button=False,
show_fullscreen_button=False,
container=False,
show_share_button=False,
)
with gr.Column(
scale=6,
variant="default",
):
gr.HTML(
"""
<div style="display: flex; align-items: center; justify-content: center;">
<h1 style="font-size: 2em; font-weight: bold; margin-top: 1em;">
Video Dubbing Interface
</h1>
</div>
""",
)
with gr.Column(
scale=1,
min_width=0,
):
gr.Image(
"NLPGhana_logo_1.png",
show_label=False,
height=200,
show_download_button=False,
show_fullscreen_button=False,
container=False,
show_share_button=False,
)
gr.HTML("<hr style='margin-top: 0.5em;'>")
gr.HTML("<div style='height: 20px;'></div>")
# main interface components
with gr.Row():
with gr.Column():
input_video = gr.Video(label="Input Video", sources=["upload"])
input_speaker = gr.Radio(
label="Select Speaker",
choices=["male", "female"],
value="female",
min_width=50,
container=True,
show_label=True,
)
submit = gr.Button("Process Video", scale=1)
output_video = gr.Video(label="Processed Video")
submit.click(
process_video_translation,
inputs=[input_video, input_speaker],
outputs=output_video,
)
gr.HTML("<div style='height: 10px;'></div>")
# Launch the interface
demo.launch(debug=True)
|