|
import streamlit as st |
|
from moviepy.editor import VideoFileClip |
|
import whisper |
|
from googletrans import Translator |
|
from gtts import gTTS |
|
import tempfile |
|
import os |
|
import numpy as np |
|
|
|
|
|
whisper_model = whisper.load_model("base") |
|
|
|
|
|
LANGUAGES = { |
|
'English': 'en', |
|
'Tamil': 'ta', |
|
'Sinhala': 'si', |
|
'French': 'fr', |
|
} |
|
|
|
st.title("AI Video Translator with Whisper and GTTS") |
|
|
|
|
|
video_file = st.file_uploader("Upload a video file", type=["mp4", "mov", "avi", "mkv"]) |
|
|
|
if video_file: |
|
|
|
target_language = st.selectbox("Select the target language for translation", list(LANGUAGES.keys())) |
|
|
|
|
|
if st.button("Translate Video"): |
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_video: |
|
temp_video.write(video_file.read()) |
|
temp_video_path = temp_video.name |
|
|
|
|
|
try: |
|
video = VideoFileClip(temp_video_path) |
|
audio_path = tempfile.mktemp(suffix=".wav") |
|
video.audio.write_audiofile(audio_path) |
|
except Exception as e: |
|
st.error(f"Error extracting audio from video: {e}") |
|
os.remove(temp_video_path) |
|
st.stop() |
|
|
|
|
|
try: |
|
|
|
audio = whisper.load_audio(audio_path) |
|
audio_segments = split_audio(audio, segment_length=30) |
|
|
|
original_text = "" |
|
for segment in audio_segments: |
|
result = whisper_model.transcribe(segment) |
|
original_text += result["text"] + " " |
|
|
|
st.write("Original Transcription:", original_text.strip()) |
|
|
|
|
|
translator = Translator() |
|
translated_text = translator.translate(original_text.strip(), dest=LANGUAGES[target_language]).text |
|
st.write(f"Translated Text ({target_language}):", translated_text) |
|
|
|
|
|
tts = gTTS(text=translated_text, lang=LANGUAGES[target_language]) |
|
audio_output_path = tempfile.mktemp(suffix=".mp3") |
|
tts.save(audio_output_path) |
|
|
|
|
|
st.success("Translation successful!") |
|
st.audio(audio_output_path, format="audio/mp3") |
|
except Exception as e: |
|
st.error(f"Error during transcription/translation: {e}") |
|
|
|
|
|
os.remove(temp_video_path) |
|
os.remove(audio_path) |
|
os.remove(audio_output_path) |
|
|
|
def split_audio(audio, segment_length=30): |
|
"""Split audio into segments of specified length in seconds.""" |
|
total_length = audio.shape[1] |
|
segments = [] |
|
for start in range(0, total_length, segment_length): |
|
end = min(start + segment_length, total_length) |
|
segment = audio[:, start:end] |
|
segments.append(segment) |
|
return segments |
|
|