File size: 3,278 Bytes
f89c8ce 8eab835 f89c8ce 8eab835 f89c8ce 990b7d7 8eab835 f89c8ce 8eab835 f89c8ce 8eab835 f89c8ce 8eab835 f89c8ce 8eab835 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import streamlit as st
from moviepy.editor import VideoFileClip
import whisper
from googletrans import Translator
from gtts import gTTS
import tempfile
import os
import numpy as np
# Initialize Whisper model
whisper_model = whisper.load_model("base")
# Language options
LANGUAGES = {
'English': 'en',
'Tamil': 'ta',
'Sinhala': 'si',
'French': 'fr', # Add more languages as needed
}
st.title("AI Video Translator with Whisper and GTTS")
# Step 1: Upload video file
video_file = st.file_uploader("Upload a video file", type=["mp4", "mov", "avi", "mkv"])
if video_file:
# Step 2: Select translation language
target_language = st.selectbox("Select the target language for translation", list(LANGUAGES.keys()))
# Process when user clicks translate
if st.button("Translate Video"):
# Save video to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_video:
temp_video.write(video_file.read())
temp_video_path = temp_video.name
# Extract audio from video
try:
video = VideoFileClip(temp_video_path)
audio_path = tempfile.mktemp(suffix=".wav")
video.audio.write_audiofile(audio_path)
except Exception as e:
st.error(f"Error extracting audio from video: {e}")
os.remove(temp_video_path)
st.stop()
# Transcribe audio using Whisper in chunks
try:
# Load the audio file with Whisper
audio = whisper.load_audio(audio_path)
audio_segments = split_audio(audio, segment_length=30) # Split into 30-second segments
original_text = ""
for segment in audio_segments:
result = whisper_model.transcribe(segment)
original_text += result["text"] + " " # Concatenate transcriptions
st.write("Original Transcription:", original_text.strip())
# Translate text to the target language
translator = Translator()
translated_text = translator.translate(original_text.strip(), dest=LANGUAGES[target_language]).text
st.write(f"Translated Text ({target_language}):", translated_text)
# Convert translated text to speech
tts = gTTS(text=translated_text, lang=LANGUAGES[target_language])
audio_output_path = tempfile.mktemp(suffix=".mp3")
tts.save(audio_output_path)
# Display translated text and audio
st.success("Translation successful!")
st.audio(audio_output_path, format="audio/mp3")
except Exception as e:
st.error(f"Error during transcription/translation: {e}")
# Clean up temporary files
os.remove(temp_video_path)
os.remove(audio_path)
os.remove(audio_output_path)
def split_audio(audio, segment_length=30):
"""Split audio into segments of specified length in seconds."""
total_length = audio.shape[1] # Total length in seconds
segments = []
for start in range(0, total_length, segment_length):
end = min(start + segment_length, total_length)
segment = audio[:, start:end] # Append the segment
segments.append(segment)
return segments
|