File size: 3,278 Bytes
f89c8ce
 
 
8eab835
f89c8ce
 
 
8eab835
f89c8ce
990b7d7
8eab835
f89c8ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8eab835
f89c8ce
8eab835
 
 
 
 
 
 
 
 
 
f89c8ce
 
8eab835
 
f89c8ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8eab835
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import streamlit as st
from moviepy.editor import VideoFileClip
import whisper
from googletrans import Translator
from gtts import gTTS
import tempfile
import os
import numpy as np

# Initialize Whisper model
whisper_model = whisper.load_model("base")

# Language options
LANGUAGES = {
    'English': 'en',
    'Tamil': 'ta',
    'Sinhala': 'si',
    'French': 'fr',  # Add more languages as needed
}

st.title("AI Video Translator with Whisper and GTTS")

# Step 1: Upload video file
video_file = st.file_uploader("Upload a video file", type=["mp4", "mov", "avi", "mkv"])

if video_file:
    # Step 2: Select translation language
    target_language = st.selectbox("Select the target language for translation", list(LANGUAGES.keys()))

    # Process when user clicks translate
    if st.button("Translate Video"):
        # Save video to a temporary file
        with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_video:
            temp_video.write(video_file.read())
            temp_video_path = temp_video.name

        # Extract audio from video
        try:
            video = VideoFileClip(temp_video_path)
            audio_path = tempfile.mktemp(suffix=".wav")
            video.audio.write_audiofile(audio_path)
        except Exception as e:
            st.error(f"Error extracting audio from video: {e}")
            os.remove(temp_video_path)
            st.stop()

        # Transcribe audio using Whisper in chunks
        try:
            # Load the audio file with Whisper
            audio = whisper.load_audio(audio_path)
            audio_segments = split_audio(audio, segment_length=30)  # Split into 30-second segments

            original_text = ""
            for segment in audio_segments:
                result = whisper_model.transcribe(segment)
                original_text += result["text"] + " "  # Concatenate transcriptions

            st.write("Original Transcription:", original_text.strip())

            # Translate text to the target language
            translator = Translator()
            translated_text = translator.translate(original_text.strip(), dest=LANGUAGES[target_language]).text
            st.write(f"Translated Text ({target_language}):", translated_text)

            # Convert translated text to speech
            tts = gTTS(text=translated_text, lang=LANGUAGES[target_language])
            audio_output_path = tempfile.mktemp(suffix=".mp3")
            tts.save(audio_output_path)

            # Display translated text and audio
            st.success("Translation successful!")
            st.audio(audio_output_path, format="audio/mp3")
        except Exception as e:
            st.error(f"Error during transcription/translation: {e}")

        # Clean up temporary files
        os.remove(temp_video_path)
        os.remove(audio_path)
        os.remove(audio_output_path)

def split_audio(audio, segment_length=30):
    """Split audio into segments of specified length in seconds."""
    total_length = audio.shape[1]  # Total length in seconds
    segments = []
    for start in range(0, total_length, segment_length):
        end = min(start + segment_length, total_length)
        segment = audio[:, start:end]  # Append the segment
        segments.append(segment)
    return segments