Spaces:

Ghana-NLP
/

demo-dubbing

Paused

File size: 3,960 Bytes

c59a3c0
cf4f031
c59a3c0
 
 
 
 
 
 
 
 
 
 
 
 
 
fb7e4f3
 
 
cf4f031
c59a3c0
cf4f031
c59a3c0
cf4f031
 
 
c59a3c0
 
cf4f031
c59a3c0
 
cf4f031
c59a3c0
fb7e4f3
c59a3c0
 
cf4f031
c59a3c0
 
 
 
 
 
cf4f031
 
 
c59a3c0
 
cf4f031
c59a3c0
 
 
 
 
 
 
 
 
fb7e4f3
 
 
 
c59a3c0
fb7e4f3
c59a3c0
 
 
 
 
 
 
 
 
 
fb7e4f3
c59a3c0
 
 
fb7e4f3
c59a3c0
 
fb7e4f3
 
c59a3c0
fb7e4f3
 
c993592
 
 
 
 
fb7e4f3
 
 
c59a3c0
 
 
 
 
c993592
c59a3c0
fb7e4f3
c59a3c0
 
 
fb7e4f3
c59a3c0
fb7e4f3
c59a3c0
fb7e4f3
c59a3c0
fb7e4f3
c59a3c0
fb7e4f3
c993592
fb7e4f3
 
 
 
 
 
 
 
 
c993592
c59a3c0
 
 
 
 
 
fb7e4f3
c59a3c0

import gradio as gr
from tqdm.asyncio import tqdm_asyncio

from pipeline import (
    extract_audio_from_video,
    transcribe_and_preprocess_audio,
    translation_main,
    tts_main,
    create_combined_output,
)
from pipeline import translation_hdr, translation_url, LANG


async def process_video_translation(
    input_video, speaker, progress=gr.Progress(track_tqdm=True)
):
    if input_video is None:
        gr.Info("Please upload a video file", duration=2)
        return

    total_stages = 6

    output_video = f"{input_video.split('.')[0]}_translated.mp4"
    with tqdm_asyncio(total=total_stages, desc="Processing video translation") as pbar:

        # stage 1: extract audio from video
        progress(0.1, desc="Extracting audio from video")
        output_audio_path = extract_audio_from_video(input_video)
        pbar.update(1)

        # transcribe audio
        progress(0.2, desc="Transcribing audio")
        sentences = transcribe_and_preprocess_audio(output_audio_path)
        pbar.update(1)

        # translate to twi
        progress(0.4, desc="Translating to Twi")
        khaya_translations = await translation_main(
            sentences, translation_url, translation_hdr, LANG
        )
        pbar.update(1)

        # convert to speech
        progress(0.7, desc="Converting to speech")
        output_audio = await tts_main(khaya_translations, speaker)
        # print(tts_output_files)
        pbar.update(1)

        progress(1.0, desc="Combining audio and video")
        create_combined_output(input_video, output_audio, output_video)
        pbar.update(1)

        print("Video translation completed")
        gr.Info(f"Video translation completed", duration=2)

        return output_video


app_theme = gr.themes.Ocean(
    text_size="lg",
    spacing_size="lg",
)
with gr.Blocks(
    theme=app_theme,
    title="Video Dubbing Interface",
) as demo:
    with gr.Row(variant="default"):
        with gr.Column(
            scale=1,
            min_width=0,
        ):
            gr.Image(
                "logo_2.jpeg",
                show_label=False,
                height=200,
                show_download_button=False,
                show_fullscreen_button=False,
                container=False,
                show_share_button=False,
            )
        with gr.Column(
            scale=6,
            variant="default",
        ):
            gr.HTML(
                """
                <div style="display: flex; align-items: center; justify-content: center;">
        <h1 style="font-size: 2em; font-weight: bold; margin-top: 1em;">
            Video Dubbing Interface
        </h1>
    </div>

                """,
            )
        with gr.Column(
            scale=1,
            min_width=0,
        ):
            gr.Image(
                "NLPGhana_logo_1.png",
                show_label=False,
                height=200,
                show_download_button=False,
                show_fullscreen_button=False,
                container=False,
                show_share_button=False,
            )
    gr.HTML("<hr style='margin-top: 0.5em;'>")

    gr.HTML("<div style='height: 20px;'></div>")

    # main interface components
    with gr.Row():
        with gr.Column():
            input_video = gr.Video(label="Input Video", sources=["upload"])
            input_speaker = gr.Radio(
                label="Select Speaker",
                choices=["male", "female"],
                value="female",
                min_width=50,
                container=True,
                show_label=True,
            )
            submit = gr.Button("Process Video", scale=1)
        output_video = gr.Video(label="Processed Video")
        submit.click(
            process_video_translation,
            inputs=[input_video, input_speaker],
            outputs=output_video,
        )

    gr.HTML("<div style='height: 10px;'></div>")


# Launch the interface
demo.launch(debug=True)