import streamlit as st from moviepy.editor import VideoFileClip import whisper from googletrans import Translator from gtts import gTTS import tempfile import os import numpy as np # Initialize Whisper model whisper_model = whisper.load_model("base") # Language options LANGUAGES = { 'English': 'en', 'Tamil': 'ta', 'Sinhala': 'si', 'French': 'fr', # Add more languages as needed } st.title("AI Video Translator with Whisper and GTTS") # Step 1: Upload video file video_file = st.file_uploader("Upload a video file", type=["mp4", "mov", "avi", "mkv"]) if video_file: # Step 2: Select translation language target_language = st.selectbox("Select the target language for translation", list(LANGUAGES.keys())) # Process when user clicks translate if st.button("Translate Video"): # Save video to a temporary file with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_video: temp_video.write(video_file.read()) temp_video_path = temp_video.name # Extract audio from video try: video = VideoFileClip(temp_video_path) audio_path = tempfile.mktemp(suffix=".wav") video.audio.write_audiofile(audio_path) except Exception as e: st.error(f"Error extracting audio from video: {e}") os.remove(temp_video_path) st.stop() # Transcribe audio using Whisper in chunks try: # Load the audio file with Whisper audio = whisper.load_audio(audio_path) audio_segments = split_audio(audio, segment_length=30) # Split into 30-second segments original_text = "" for segment in audio_segments: result = whisper_model.transcribe(segment) original_text += result["text"] + " " # Concatenate transcriptions st.write("Original Transcription:", original_text.strip()) # Translate text to the target language translator = Translator() translated_text = translator.translate(original_text.strip(), dest=LANGUAGES[target_language]).text st.write(f"Translated Text ({target_language}):", translated_text) # Convert translated text to speech tts = gTTS(text=translated_text, lang=LANGUAGES[target_language]) audio_output_path = tempfile.mktemp(suffix=".mp3") tts.save(audio_output_path) # Display translated text and audio st.success("Translation successful!") st.audio(audio_output_path, format="audio/mp3") except Exception as e: st.error(f"Error during transcription/translation: {e}") # Clean up temporary files os.remove(temp_video_path) os.remove(audio_path) os.remove(audio_output_path) def split_audio(audio, segment_length=30): """Split audio into segments of specified length in seconds.""" total_length = audio.shape[1] # Total length in seconds segments = [] for start in range(0, total_length, segment_length): end = min(start + segment_length, total_length) segment = audio[:, start:end] # Append the segment segments.append(segment) return segments