|
import streamlit as st |
|
from moviepy.editor import VideoFileClip, AudioFileClip, concatenate_audioclips |
|
import whisper |
|
from translate import Translator |
|
from gtts import gTTS |
|
import tempfile |
|
import os |
|
import numpy as np |
|
|
|
|
|
try: |
|
whisper_model = whisper.load_model("base") |
|
except Exception as e: |
|
st.error(f"Error loading Whisper model: {e}") |
|
|
|
|
|
LANGUAGES = { |
|
'English': 'en', |
|
'Tamil': 'ta', |
|
'Sinhala': 'si', |
|
'French': 'fr', |
|
} |
|
|
|
st.title("AI Video Translator with Whisper and GTTS") |
|
|
|
|
|
video_file = st.file_uploader("Upload a video file", type=["mp4", "mov", "avi", "mkv"]) |
|
|
|
if video_file: |
|
|
|
target_language = st.selectbox("Select the target language for translation", list(LANGUAGES.keys())) |
|
|
|
|
|
if st.button("Translate Video"): |
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_video: |
|
temp_video.write(video_file.read()) |
|
temp_video_path = temp_video.name |
|
|
|
|
|
try: |
|
video = VideoFileClip(temp_video_path) |
|
audio_path = tempfile.mktemp(suffix=".wav") |
|
video.audio.write_audiofile(audio_path) |
|
except Exception as e: |
|
st.error(f"Error extracting audio from video: {e}") |
|
os.remove(temp_video_path) |
|
st.stop() |
|
|
|
|
|
def transcribe_audio_in_chunks(audio_path, model, chunk_length=30): |
|
audio_clip = whisper.load_audio(audio_path) |
|
audio_duration = len(audio_clip) / whisper.audio.SAMPLE_RATE |
|
segments = [] |
|
|
|
for start in np.arange(0, audio_duration, chunk_length): |
|
end = min(start + chunk_length, audio_duration) |
|
segment = audio_clip[int(start * whisper.audio.SAMPLE_RATE):int(end * whisper.audio.SAMPLE_RATE)] |
|
result = model.transcribe(segment) |
|
segments.append(result['text']) |
|
|
|
return ' '.join(segments) |
|
|
|
|
|
def translate_in_chunks(text, translator, max_length=500): |
|
words = text.split() |
|
chunks = [] |
|
current_chunk = "" |
|
|
|
for word in words: |
|
if len(current_chunk) + len(word) + 1 <= max_length: |
|
current_chunk += " " + word if current_chunk else word |
|
else: |
|
chunks.append(current_chunk) |
|
current_chunk = word |
|
|
|
if current_chunk: |
|
chunks.append(current_chunk) |
|
|
|
translated_chunks = [translator.translate(chunk) for chunk in chunks] |
|
return ' '.join(translated_chunks) |
|
|
|
|
|
try: |
|
original_text = transcribe_audio_in_chunks(audio_path, whisper_model) |
|
st.write("Original Transcription:", original_text) |
|
|
|
|
|
translator = Translator(to_lang=LANGUAGES[target_language]) |
|
translated_text = translate_in_chunks(original_text, translator) |
|
st.write(f"Translated Text ({target_language}):", translated_text) |
|
|
|
|
|
tts_clips = [] |
|
words = translated_text.split() |
|
chunk = "" |
|
max_length = 200 |
|
|
|
for word in words: |
|
if len(chunk) + len(word) + 1 > max_length: |
|
tts = gTTS(text=chunk, lang=LANGUAGES[target_language]) |
|
tts_audio_path = tempfile.mktemp(suffix=".mp3") |
|
tts.save(tts_audio_path) |
|
tts_clips.append(AudioFileClip(tts_audio_path)) |
|
chunk = word |
|
else: |
|
chunk += " " + word if chunk else word |
|
|
|
if chunk: |
|
tts = gTTS(text=chunk, lang=LANGUAGES[target_language]) |
|
tts_audio_path = tempfile.mktemp(suffix=".mp3") |
|
tts.save(tts_audio_path) |
|
tts_clips.append(AudioFileClip(tts_audio_path)) |
|
|
|
|
|
final_audio = concatenate_audioclips(tts_clips) |
|
translated_audio_path = tempfile.mktemp(suffix=".mp3") |
|
final_audio.write_audiofile(translated_audio_path) |
|
|
|
|
|
final_video_path = tempfile.mktemp(suffix=".mp4") |
|
original_video = VideoFileClip(temp_video_path) |
|
final_video = original_video.set_audio(AudioFileClip(translated_audio_path)) |
|
final_video.write_videofile(final_video_path, codec='libx264', audio_codec='aac') |
|
|
|
|
|
st.success("Translation successful! Download your translated video below:") |
|
st.video(final_video_path) |
|
|
|
|
|
with open(final_video_path, "rb") as f: |
|
st.download_button("Download Translated Video", f, file_name="translated_video.mp4") |
|
|
|
except Exception as e: |
|
st.error(f"Error during transcription/translation: {e}") |
|
|
|
|
|
for clip in tts_clips: |
|
os.remove(clip.filename) |
|
os.remove(temp_video_path) |
|
os.remove(audio_path) |
|
os.remove(translated_audio_path) |
|
os.remove(final_video_path) |
|
|