Aita / app.py
Artificial-superintelligence's picture
Update app.py
12e5a2b verified
raw
history blame
5.68 kB
import streamlit as st
from moviepy.editor import VideoFileClip, AudioFileClip, concatenate_audioclips
import whisper
from translate import Translator
from gtts import gTTS
import tempfile
import os
import numpy as np
# Initialize Whisper model
try:
whisper_model = whisper.load_model("base")
except Exception as e:
st.error(f"Error loading Whisper model: {e}")
# Language options
LANGUAGES = {
'English': 'en',
'Tamil': 'ta',
'Sinhala': 'si',
'French': 'fr', # Add more languages as needed
}
st.title("AI Video Translator with Whisper and GTTS")
# Step 1: Upload video file
video_file = st.file_uploader("Upload a video file", type=["mp4", "mov", "avi", "mkv"])
if video_file:
# Step 2: Select translation language
target_language = st.selectbox("Select the target language for translation", list(LANGUAGES.keys()))
# Process when user clicks translate
if st.button("Translate Video"):
# Save video to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_video:
temp_video.write(video_file.read())
temp_video_path = temp_video.name
# Extract audio from video
try:
video = VideoFileClip(temp_video_path)
audio_path = tempfile.mktemp(suffix=".wav")
video.audio.write_audiofile(audio_path)
except Exception as e:
st.error(f"Error extracting audio from video: {e}")
os.remove(temp_video_path)
st.stop()
# Function to transcribe audio in chunks
def transcribe_audio_in_chunks(audio_path, model, chunk_length=30):
audio_clip = whisper.load_audio(audio_path)
audio_duration = len(audio_clip) / whisper.audio.SAMPLE_RATE # Calculate duration in seconds
segments = []
for start in np.arange(0, audio_duration, chunk_length):
end = min(start + chunk_length, audio_duration)
segment = audio_clip[int(start * whisper.audio.SAMPLE_RATE):int(end * whisper.audio.SAMPLE_RATE)]
result = model.transcribe(segment)
segments.append(result['text'])
return ' '.join(segments)
# Function to translate text in chunks
def translate_in_chunks(text, translator, max_length=500):
words = text.split()
chunks = []
current_chunk = ""
for word in words:
if len(current_chunk) + len(word) + 1 <= max_length:
current_chunk += " " + word if current_chunk else word
else:
chunks.append(current_chunk)
current_chunk = word
if current_chunk:
chunks.append(current_chunk)
translated_chunks = [translator.translate(chunk) for chunk in chunks]
return ' '.join(translated_chunks)
# Transcribe audio using Whisper
try:
original_text = transcribe_audio_in_chunks(audio_path, whisper_model)
st.write("Original Transcription:", original_text)
# Translate text to the target language
translator = Translator(to_lang=LANGUAGES[target_language])
translated_text = translate_in_chunks(original_text, translator)
st.write(f"Translated Text ({target_language}):", translated_text)
# Convert translated text to speech in chunks
tts_clips = []
words = translated_text.split()
chunk = ""
max_length = 200 # Adjust as needed
for word in words:
if len(chunk) + len(word) + 1 > max_length:
tts = gTTS(text=chunk, lang=LANGUAGES[target_language])
tts_audio_path = tempfile.mktemp(suffix=".mp3")
tts.save(tts_audio_path)
tts_clips.append(AudioFileClip(tts_audio_path))
chunk = word
else:
chunk += " " + word if chunk else word
if chunk: # Process last chunk
tts = gTTS(text=chunk, lang=LANGUAGES[target_language])
tts_audio_path = tempfile.mktemp(suffix=".mp3")
tts.save(tts_audio_path)
tts_clips.append(AudioFileClip(tts_audio_path))
# Concatenate all TTS audio chunks
final_audio = concatenate_audioclips(tts_clips)
translated_audio_path = tempfile.mktemp(suffix=".mp3")
final_audio.write_audiofile(translated_audio_path)
# Merge translated audio with the original video
final_video_path = tempfile.mktemp(suffix=".mp4")
original_video = VideoFileClip(temp_video_path)
final_video = original_video.set_audio(AudioFileClip(translated_audio_path))
final_video.write_videofile(final_video_path, codec='libx264', audio_codec='aac')
# Display success message and provide download link
st.success("Translation successful! Download your translated video below:")
st.video(final_video_path)
# Provide download link
with open(final_video_path, "rb") as f:
st.download_button("Download Translated Video", f, file_name="translated_video.mp4")
except Exception as e:
st.error(f"Error during transcription/translation: {e}")
# Clean up temporary files
for clip in tts_clips:
os.remove(clip.filename)
os.remove(temp_video_path)
os.remove(audio_path)
os.remove(translated_audio_path)
os.remove(final_video_path)