File size: 3,960 Bytes
c59a3c0
cf4f031
c59a3c0
 
 
 
 
 
 
 
 
 
 
 
 
 
fb7e4f3
 
 
cf4f031
c59a3c0
cf4f031
c59a3c0
cf4f031
 
 
c59a3c0
 
cf4f031
c59a3c0
 
cf4f031
c59a3c0
fb7e4f3
c59a3c0
 
cf4f031
c59a3c0
 
 
 
 
 
cf4f031
 
 
c59a3c0
 
cf4f031
c59a3c0
 
 
 
 
 
 
 
 
fb7e4f3
 
 
 
c59a3c0
fb7e4f3
c59a3c0
 
 
 
 
 
 
 
 
 
fb7e4f3
c59a3c0
 
 
fb7e4f3
c59a3c0
 
fb7e4f3
 
c59a3c0
fb7e4f3
 
c993592
 
 
 
 
fb7e4f3
 
 
c59a3c0
 
 
 
 
c993592
c59a3c0
fb7e4f3
c59a3c0
 
 
fb7e4f3
c59a3c0
fb7e4f3
c59a3c0
fb7e4f3
c59a3c0
fb7e4f3
c59a3c0
fb7e4f3
c993592
fb7e4f3
 
 
 
 
 
 
 
 
c993592
c59a3c0
 
 
 
 
 
fb7e4f3
c59a3c0
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import gradio as gr
from tqdm.asyncio import tqdm_asyncio

from pipeline import (
    extract_audio_from_video,
    transcribe_and_preprocess_audio,
    translation_main,
    tts_main,
    create_combined_output,
)
from pipeline import translation_hdr, translation_url, LANG


async def process_video_translation(
    input_video, speaker, progress=gr.Progress(track_tqdm=True)
):
    if input_video is None:
        gr.Info("Please upload a video file", duration=2)
        return

    total_stages = 6

    output_video = f"{input_video.split('.')[0]}_translated.mp4"
    with tqdm_asyncio(total=total_stages, desc="Processing video translation") as pbar:

        # stage 1: extract audio from video
        progress(0.1, desc="Extracting audio from video")
        output_audio_path = extract_audio_from_video(input_video)
        pbar.update(1)

        # transcribe audio
        progress(0.2, desc="Transcribing audio")
        sentences = transcribe_and_preprocess_audio(output_audio_path)
        pbar.update(1)

        # translate to twi
        progress(0.4, desc="Translating to Twi")
        khaya_translations = await translation_main(
            sentences, translation_url, translation_hdr, LANG
        )
        pbar.update(1)

        # convert to speech
        progress(0.7, desc="Converting to speech")
        output_audio = await tts_main(khaya_translations, speaker)
        # print(tts_output_files)
        pbar.update(1)

        progress(1.0, desc="Combining audio and video")
        create_combined_output(input_video, output_audio, output_video)
        pbar.update(1)

        print("Video translation completed")
        gr.Info(f"Video translation completed", duration=2)

        return output_video


app_theme = gr.themes.Ocean(
    text_size="lg",
    spacing_size="lg",
)
with gr.Blocks(
    theme=app_theme,
    title="Video Dubbing Interface",
) as demo:
    with gr.Row(variant="default"):
        with gr.Column(
            scale=1,
            min_width=0,
        ):
            gr.Image(
                "logo_2.jpeg",
                show_label=False,
                height=200,
                show_download_button=False,
                show_fullscreen_button=False,
                container=False,
                show_share_button=False,
            )
        with gr.Column(
            scale=6,
            variant="default",
        ):
            gr.HTML(
                """
                <div style="display: flex; align-items: center; justify-content: center;">
        <h1 style="font-size: 2em; font-weight: bold; margin-top: 1em;">
            Video Dubbing Interface
        </h1>
    </div>

                """,
            )
        with gr.Column(
            scale=1,
            min_width=0,
        ):
            gr.Image(
                "NLPGhana_logo_1.png",
                show_label=False,
                height=200,
                show_download_button=False,
                show_fullscreen_button=False,
                container=False,
                show_share_button=False,
            )
    gr.HTML("<hr style='margin-top: 0.5em;'>")

    gr.HTML("<div style='height: 20px;'></div>")

    # main interface components
    with gr.Row():
        with gr.Column():
            input_video = gr.Video(label="Input Video", sources=["upload"])
            input_speaker = gr.Radio(
                label="Select Speaker",
                choices=["male", "female"],
                value="female",
                min_width=50,
                container=True,
                show_label=True,
            )
            submit = gr.Button("Process Video", scale=1)
        output_video = gr.Video(label="Processed Video")
        submit.click(
            process_video_translation,
            inputs=[input_video, input_speaker],
            outputs=output_video,
        )

    gr.HTML("<div style='height: 10px;'></div>")


# Launch the interface
demo.launch(debug=True)