File size: 5,681 Bytes
f89c8ce
12e5a2b
f89c8ce
04676e1
f89c8ce
 
 
8eab835
f89c8ce
990b7d7
04676e1
157a5b0
4ff1681
 
f89c8ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
04676e1
 
 
4ff1681
04676e1
 
 
 
157a5b0
04676e1
 
8eab835
04676e1
8eab835
157a5b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
04676e1
 
 
 
f89c8ce
 
04676e1
157a5b0
f89c8ce
 
12e5a2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19707c5
12e5a2b
19707c5
 
 
 
12e5a2b
19707c5
 
 
 
 
 
 
 
 
f89c8ce
 
 
 
 
12e5a2b
 
f89c8ce
 
12e5a2b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import streamlit as st
from moviepy.editor import VideoFileClip, AudioFileClip, concatenate_audioclips
import whisper
from translate import Translator
from gtts import gTTS
import tempfile
import os
import numpy as np

# Initialize Whisper model
try:
    whisper_model = whisper.load_model("base")
except Exception as e:
    st.error(f"Error loading Whisper model: {e}")

# Language options
LANGUAGES = {
    'English': 'en',
    'Tamil': 'ta',
    'Sinhala': 'si',
    'French': 'fr',  # Add more languages as needed
}

st.title("AI Video Translator with Whisper and GTTS")

# Step 1: Upload video file
video_file = st.file_uploader("Upload a video file", type=["mp4", "mov", "avi", "mkv"])

if video_file:
    # Step 2: Select translation language
    target_language = st.selectbox("Select the target language for translation", list(LANGUAGES.keys()))

    # Process when user clicks translate
    if st.button("Translate Video"):
        # Save video to a temporary file
        with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_video:
            temp_video.write(video_file.read())
            temp_video_path = temp_video.name

        # Extract audio from video
        try:
            video = VideoFileClip(temp_video_path)
            audio_path = tempfile.mktemp(suffix=".wav")
            video.audio.write_audiofile(audio_path)
        except Exception as e:
            st.error(f"Error extracting audio from video: {e}")
            os.remove(temp_video_path)
            st.stop()

        # Function to transcribe audio in chunks
        def transcribe_audio_in_chunks(audio_path, model, chunk_length=30):
            audio_clip = whisper.load_audio(audio_path)
            audio_duration = len(audio_clip) / whisper.audio.SAMPLE_RATE  # Calculate duration in seconds
            segments = []

            for start in np.arange(0, audio_duration, chunk_length):
                end = min(start + chunk_length, audio_duration)
                segment = audio_clip[int(start * whisper.audio.SAMPLE_RATE):int(end * whisper.audio.SAMPLE_RATE)]
                result = model.transcribe(segment)
                segments.append(result['text'])

            return ' '.join(segments)

        # Function to translate text in chunks
        def translate_in_chunks(text, translator, max_length=500):
            words = text.split()
            chunks = []
            current_chunk = ""

            for word in words:
                if len(current_chunk) + len(word) + 1 <= max_length:
                    current_chunk += " " + word if current_chunk else word
                else:
                    chunks.append(current_chunk)
                    current_chunk = word

            if current_chunk:
                chunks.append(current_chunk)

            translated_chunks = [translator.translate(chunk) for chunk in chunks]
            return ' '.join(translated_chunks)

        # Transcribe audio using Whisper
        try:
            original_text = transcribe_audio_in_chunks(audio_path, whisper_model)
            st.write("Original Transcription:", original_text)

            # Translate text to the target language
            translator = Translator(to_lang=LANGUAGES[target_language])
            translated_text = translate_in_chunks(original_text, translator)
            st.write(f"Translated Text ({target_language}):", translated_text)

            # Convert translated text to speech in chunks
            tts_clips = []
            words = translated_text.split()
            chunk = ""
            max_length = 200  # Adjust as needed

            for word in words:
                if len(chunk) + len(word) + 1 > max_length:
                    tts = gTTS(text=chunk, lang=LANGUAGES[target_language])
                    tts_audio_path = tempfile.mktemp(suffix=".mp3")
                    tts.save(tts_audio_path)
                    tts_clips.append(AudioFileClip(tts_audio_path))
                    chunk = word
                else:
                    chunk += " " + word if chunk else word

            if chunk:  # Process last chunk
                tts = gTTS(text=chunk, lang=LANGUAGES[target_language])
                tts_audio_path = tempfile.mktemp(suffix=".mp3")
                tts.save(tts_audio_path)
                tts_clips.append(AudioFileClip(tts_audio_path))

            # Concatenate all TTS audio chunks
            final_audio = concatenate_audioclips(tts_clips)
            translated_audio_path = tempfile.mktemp(suffix=".mp3")
            final_audio.write_audiofile(translated_audio_path)

            # Merge translated audio with the original video
            final_video_path = tempfile.mktemp(suffix=".mp4")
            original_video = VideoFileClip(temp_video_path)
            final_video = original_video.set_audio(AudioFileClip(translated_audio_path))
            final_video.write_videofile(final_video_path, codec='libx264', audio_codec='aac')

            # Display success message and provide download link
            st.success("Translation successful! Download your translated video below:")
            st.video(final_video_path)

            # Provide download link
            with open(final_video_path, "rb") as f:
                st.download_button("Download Translated Video", f, file_name="translated_video.mp4")

        except Exception as e:
            st.error(f"Error during transcription/translation: {e}")

        # Clean up temporary files
        for clip in tts_clips:
            os.remove(clip.filename)
        os.remove(temp_video_path)
        os.remove(audio_path)
        os.remove(translated_audio_path)
        os.remove(final_video_path)