File size: 5,681 Bytes
f89c8ce 12e5a2b f89c8ce 04676e1 f89c8ce 8eab835 f89c8ce 990b7d7 04676e1 157a5b0 4ff1681 f89c8ce 04676e1 4ff1681 04676e1 157a5b0 04676e1 8eab835 04676e1 8eab835 157a5b0 04676e1 f89c8ce 04676e1 157a5b0 f89c8ce 12e5a2b 19707c5 12e5a2b 19707c5 12e5a2b 19707c5 f89c8ce 12e5a2b f89c8ce 12e5a2b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import streamlit as st
from moviepy.editor import VideoFileClip, AudioFileClip, concatenate_audioclips
import whisper
from translate import Translator
from gtts import gTTS
import tempfile
import os
import numpy as np
# Initialize Whisper model
try:
whisper_model = whisper.load_model("base")
except Exception as e:
st.error(f"Error loading Whisper model: {e}")
# Language options
LANGUAGES = {
'English': 'en',
'Tamil': 'ta',
'Sinhala': 'si',
'French': 'fr', # Add more languages as needed
}
st.title("AI Video Translator with Whisper and GTTS")
# Step 1: Upload video file
video_file = st.file_uploader("Upload a video file", type=["mp4", "mov", "avi", "mkv"])
if video_file:
# Step 2: Select translation language
target_language = st.selectbox("Select the target language for translation", list(LANGUAGES.keys()))
# Process when user clicks translate
if st.button("Translate Video"):
# Save video to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_video:
temp_video.write(video_file.read())
temp_video_path = temp_video.name
# Extract audio from video
try:
video = VideoFileClip(temp_video_path)
audio_path = tempfile.mktemp(suffix=".wav")
video.audio.write_audiofile(audio_path)
except Exception as e:
st.error(f"Error extracting audio from video: {e}")
os.remove(temp_video_path)
st.stop()
# Function to transcribe audio in chunks
def transcribe_audio_in_chunks(audio_path, model, chunk_length=30):
audio_clip = whisper.load_audio(audio_path)
audio_duration = len(audio_clip) / whisper.audio.SAMPLE_RATE # Calculate duration in seconds
segments = []
for start in np.arange(0, audio_duration, chunk_length):
end = min(start + chunk_length, audio_duration)
segment = audio_clip[int(start * whisper.audio.SAMPLE_RATE):int(end * whisper.audio.SAMPLE_RATE)]
result = model.transcribe(segment)
segments.append(result['text'])
return ' '.join(segments)
# Function to translate text in chunks
def translate_in_chunks(text, translator, max_length=500):
words = text.split()
chunks = []
current_chunk = ""
for word in words:
if len(current_chunk) + len(word) + 1 <= max_length:
current_chunk += " " + word if current_chunk else word
else:
chunks.append(current_chunk)
current_chunk = word
if current_chunk:
chunks.append(current_chunk)
translated_chunks = [translator.translate(chunk) for chunk in chunks]
return ' '.join(translated_chunks)
# Transcribe audio using Whisper
try:
original_text = transcribe_audio_in_chunks(audio_path, whisper_model)
st.write("Original Transcription:", original_text)
# Translate text to the target language
translator = Translator(to_lang=LANGUAGES[target_language])
translated_text = translate_in_chunks(original_text, translator)
st.write(f"Translated Text ({target_language}):", translated_text)
# Convert translated text to speech in chunks
tts_clips = []
words = translated_text.split()
chunk = ""
max_length = 200 # Adjust as needed
for word in words:
if len(chunk) + len(word) + 1 > max_length:
tts = gTTS(text=chunk, lang=LANGUAGES[target_language])
tts_audio_path = tempfile.mktemp(suffix=".mp3")
tts.save(tts_audio_path)
tts_clips.append(AudioFileClip(tts_audio_path))
chunk = word
else:
chunk += " " + word if chunk else word
if chunk: # Process last chunk
tts = gTTS(text=chunk, lang=LANGUAGES[target_language])
tts_audio_path = tempfile.mktemp(suffix=".mp3")
tts.save(tts_audio_path)
tts_clips.append(AudioFileClip(tts_audio_path))
# Concatenate all TTS audio chunks
final_audio = concatenate_audioclips(tts_clips)
translated_audio_path = tempfile.mktemp(suffix=".mp3")
final_audio.write_audiofile(translated_audio_path)
# Merge translated audio with the original video
final_video_path = tempfile.mktemp(suffix=".mp4")
original_video = VideoFileClip(temp_video_path)
final_video = original_video.set_audio(AudioFileClip(translated_audio_path))
final_video.write_videofile(final_video_path, codec='libx264', audio_codec='aac')
# Display success message and provide download link
st.success("Translation successful! Download your translated video below:")
st.video(final_video_path)
# Provide download link
with open(final_video_path, "rb") as f:
st.download_button("Download Translated Video", f, file_name="translated_video.mp4")
except Exception as e:
st.error(f"Error during transcription/translation: {e}")
# Clean up temporary files
for clip in tts_clips:
os.remove(clip.filename)
os.remove(temp_video_path)
os.remove(audio_path)
os.remove(translated_audio_path)
os.remove(final_video_path)
|