Spaces:

Artificial-superintelligence
/

Aita

Running

App Files Files Community

Aita / app.py

Artificial-superintelligence

Update app.py

12e5a2b verified 7 months ago

raw

history blame

5.68 kB

	import streamlit as st
	from moviepy.editor import VideoFileClip, AudioFileClip, concatenate_audioclips
	import whisper
	from translate import Translator
	from gtts import gTTS
	import tempfile
	import os
	import numpy as np

	# Initialize Whisper model
	try:
	whisper_model = whisper.load_model("base")
	except Exception as e:
	st.error(f"Error loading Whisper model: {e}")

	# Language options
	LANGUAGES = {
	'English': 'en',
	'Tamil': 'ta',
	'Sinhala': 'si',
	'French': 'fr', # Add more languages as needed
	}

	st.title("AI Video Translator with Whisper and GTTS")

	# Step 1: Upload video file
	video_file = st.file_uploader("Upload a video file", type=["mp4", "mov", "avi", "mkv"])

	if video_file:
	# Step 2: Select translation language
	target_language = st.selectbox("Select the target language for translation", list(LANGUAGES.keys()))

	# Process when user clicks translate
	if st.button("Translate Video"):
	# Save video to a temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_video:
	temp_video.write(video_file.read())
	temp_video_path = temp_video.name

	# Extract audio from video
	try:
	video = VideoFileClip(temp_video_path)
	audio_path = tempfile.mktemp(suffix=".wav")
	video.audio.write_audiofile(audio_path)
	except Exception as e:
	st.error(f"Error extracting audio from video: {e}")
	os.remove(temp_video_path)
	st.stop()

	# Function to transcribe audio in chunks
	def transcribe_audio_in_chunks(audio_path, model, chunk_length=30):
	audio_clip = whisper.load_audio(audio_path)
	audio_duration = len(audio_clip) / whisper.audio.SAMPLE_RATE # Calculate duration in seconds
	segments = []

	for start in np.arange(0, audio_duration, chunk_length):
	end = min(start + chunk_length, audio_duration)
	segment = audio_clip[int(start * whisper.audio.SAMPLE_RATE):int(end * whisper.audio.SAMPLE_RATE)]
	result = model.transcribe(segment)
	segments.append(result['text'])

	return ' '.join(segments)

	# Function to translate text in chunks
	def translate_in_chunks(text, translator, max_length=500):
	words = text.split()
	chunks = []
	current_chunk = ""

	for word in words:
	if len(current_chunk) + len(word) + 1 <= max_length:
	current_chunk += " " + word if current_chunk else word
	else:
	chunks.append(current_chunk)
	current_chunk = word

	if current_chunk:
	chunks.append(current_chunk)

	translated_chunks = [translator.translate(chunk) for chunk in chunks]
	return ' '.join(translated_chunks)

	# Transcribe audio using Whisper
	try:
	original_text = transcribe_audio_in_chunks(audio_path, whisper_model)
	st.write("Original Transcription:", original_text)

	# Translate text to the target language
	translator = Translator(to_lang=LANGUAGES[target_language])
	translated_text = translate_in_chunks(original_text, translator)
	st.write(f"Translated Text ({target_language}):", translated_text)

	# Convert translated text to speech in chunks
	tts_clips = []
	words = translated_text.split()
	chunk = ""
	max_length = 200 # Adjust as needed

	for word in words:
	if len(chunk) + len(word) + 1 > max_length:
	tts = gTTS(text=chunk, lang=LANGUAGES[target_language])
	tts_audio_path = tempfile.mktemp(suffix=".mp3")
	tts.save(tts_audio_path)
	tts_clips.append(AudioFileClip(tts_audio_path))
	chunk = word
	else:
	chunk += " " + word if chunk else word

	if chunk: # Process last chunk
	tts = gTTS(text=chunk, lang=LANGUAGES[target_language])
	tts_audio_path = tempfile.mktemp(suffix=".mp3")
	tts.save(tts_audio_path)
	tts_clips.append(AudioFileClip(tts_audio_path))

	# Concatenate all TTS audio chunks
	final_audio = concatenate_audioclips(tts_clips)
	translated_audio_path = tempfile.mktemp(suffix=".mp3")
	final_audio.write_audiofile(translated_audio_path)

	# Merge translated audio with the original video
	final_video_path = tempfile.mktemp(suffix=".mp4")
	original_video = VideoFileClip(temp_video_path)
	final_video = original_video.set_audio(AudioFileClip(translated_audio_path))
	final_video.write_videofile(final_video_path, codec='libx264', audio_codec='aac')

	# Display success message and provide download link
	st.success("Translation successful! Download your translated video below:")
	st.video(final_video_path)

	# Provide download link
	with open(final_video_path, "rb") as f:
	st.download_button("Download Translated Video", f, file_name="translated_video.mp4")

	except Exception as e:
	st.error(f"Error during transcription/translation: {e}")

	# Clean up temporary files
	for clip in tts_clips:
	os.remove(clip.filename)
	os.remove(temp_video_path)
	os.remove(audio_path)
	os.remove(translated_audio_path)
	os.remove(final_video_path)