File size: 4,566 Bytes
f89c8ce
12e5a2b
f89c8ce
04676e1
f89c8ce
 
 
8eab835
f89c8ce
990b7d7
04676e1
157a5b0
4ff1681
 
f89c8ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
04676e1
 
 
4ff1681
04676e1
 
 
 
157a5b0
04676e1
 
8eab835
04676e1
8eab835
96d6a67
 
 
 
 
 
 
 
157a5b0
04676e1
 
 
 
f89c8ce
 
04676e1
96d6a67
f89c8ce
 
96d6a67
 
 
 
19707c5
 
 
 
96d6a67
 
19707c5
 
 
 
 
 
 
 
 
f89c8ce
 
 
 
 
 
 
96d6a67
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import streamlit as st
from moviepy.editor import VideoFileClip, AudioFileClip, concatenate_audioclips
import whisper
from translate import Translator
from gtts import gTTS
import tempfile
import os
import numpy as np

# Initialize Whisper model
try:
    whisper_model = whisper.load_model("base")
except Exception as e:
    st.error(f"Error loading Whisper model: {e}")

# Language options
LANGUAGES = {
    'English': 'en',
    'Tamil': 'ta',
    'Sinhala': 'si',
    'French': 'fr',  # Add more languages as needed
}

st.title("AI Video Translator with Whisper and GTTS")

# Step 1: Upload video file
video_file = st.file_uploader("Upload a video file", type=["mp4", "mov", "avi", "mkv"])

if video_file:
    # Step 2: Select translation language
    target_language = st.selectbox("Select the target language for translation", list(LANGUAGES.keys()))

    # Process when user clicks translate
    if st.button("Translate Video"):
        # Save video to a temporary file
        with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_video:
            temp_video.write(video_file.read())
            temp_video_path = temp_video.name

        # Extract audio from video
        try:
            video = VideoFileClip(temp_video_path)
            audio_path = tempfile.mktemp(suffix=".wav")
            video.audio.write_audiofile(audio_path)
        except Exception as e:
            st.error(f"Error extracting audio from video: {e}")
            os.remove(temp_video_path)
            st.stop()

        # Function to transcribe audio in chunks
        def transcribe_audio_in_chunks(audio_path, model, chunk_length=30):
            audio_clip = whisper.load_audio(audio_path)
            audio_duration = len(audio_clip) / whisper.audio.SAMPLE_RATE  # Calculate duration in seconds
            segments = []

            for start in np.arange(0, audio_duration, chunk_length):
                end = min(start + chunk_length, audio_duration)
                segment = audio_clip[int(start * whisper.audio.SAMPLE_RATE):int(end * whisper.audio.SAMPLE_RATE)]
                result = model.transcribe(segment)
                segments.append(result['text'])

            return ' '.join(segments)

        # Translate text function with debug info
        def translate_text(original_text, translator):
            translated_text = translator.translate(original_text)
            # Debugging: Check translation results
            st.write(f"Translated Text Debug: {translated_text}")
            if translated_text.strip() == original_text.strip():
                st.warning("The translated text is the same as the original. Check if the target language is appropriate.")
            return translated_text

        # Transcribe audio using Whisper
        try:
            original_text = transcribe_audio_in_chunks(audio_path, whisper_model)
            st.write("Original Transcription:", original_text)

            # Translate text to the target language
            translator = Translator(to_lang=LANGUAGES[target_language])
            translated_text = translate_text(original_text, translator)
            st.write(f"Translated Text ({target_language}):", translated_text)

            # Convert translated text to speech
            tts_audio_path = tempfile.mktemp(suffix=".mp3")
            tts = gTTS(text=translated_text, lang=LANGUAGES[target_language])
            tts.save(tts_audio_path)

            # Merge translated audio with the original video
            final_video_path = tempfile.mktemp(suffix=".mp4")
            original_video = VideoFileClip(temp_video_path)
            final_audio = AudioFileClip(tts_audio_path)
            final_video = original_video.set_audio(final_audio)
            final_video.write_videofile(final_video_path, codec='libx264', audio_codec='aac')

            # Display success message and provide download link
            st.success("Translation successful! Download your translated video below:")
            st.video(final_video_path)

            # Provide download link
            with open(final_video_path, "rb") as f:
                st.download_button("Download Translated Video", f, file_name="translated_video.mp4")

        except Exception as e:
            st.error(f"Error during transcription/translation: {e}")

        # Clean up temporary files
        os.remove(temp_video_path)
        os.remove(audio_path)
        os.remove(tts_audio_path)
        if 'final_video_path' in locals():  # Check if final_video_path exists
            os.remove(final_video_path)