Spaces:

Artificial-superintelligence
/

Aita

Running

App Files Files Community

Aita / app.py

Artificial-superintelligence

Update app.py

19707c5 verified 8 months ago

raw

history blame

4.9 kB

	import streamlit as st
	from moviepy.editor import VideoFileClip, AudioFileClip
	import whisper
	from translate import Translator
	from gtts import gTTS
	import tempfile
	import os
	import numpy as np

	# Initialize Whisper model
	try:
	whisper_model = whisper.load_model("base")
	except Exception as e:
	st.error(f"Error loading Whisper model: {e}")

	# Language options
	LANGUAGES = {
	'English': 'en',
	'Tamil': 'ta',
	'Sinhala': 'si',
	'French': 'fr', # Add more languages as needed
	}

	st.title("AI Video Translator with Whisper and GTTS")

	# Step 1: Upload video file
	video_file = st.file_uploader("Upload a video file", type=["mp4", "mov", "avi", "mkv"])

	if video_file:
	# Step 2: Select translation language
	target_language = st.selectbox("Select the target language for translation", list(LANGUAGES.keys()))

	# Process when user clicks translate
	if st.button("Translate Video"):
	# Save video to a temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_video:
	temp_video.write(video_file.read())
	temp_video_path = temp_video.name

	# Extract audio from video
	try:
	video = VideoFileClip(temp_video_path)
	audio_path = tempfile.mktemp(suffix=".wav")
	video.audio.write_audiofile(audio_path)
	except Exception as e:
	st.error(f"Error extracting audio from video: {e}")
	os.remove(temp_video_path)
	st.stop()

	# Function to transcribe audio in chunks
	def transcribe_audio_in_chunks(audio_path, model, chunk_length=30):
	audio_clip = whisper.load_audio(audio_path)
	audio_duration = len(audio_clip) / whisper.audio.SAMPLE_RATE # Calculate duration in seconds
	segments = []

	for start in np.arange(0, audio_duration, chunk_length):
	end = min(start + chunk_length, audio_duration)
	segment = audio_clip[int(start * whisper.audio.SAMPLE_RATE):int(end * whisper.audio.SAMPLE_RATE)]
	result = model.transcribe(segment)
	segments.append(result['text'])

	return ' '.join(segments)

	# Function to translate text in chunks
	def translate_in_chunks(text, translator, max_length=500):
	words = text.split()
	chunks = []
	current_chunk = ""

	for word in words:
	if len(current_chunk) + len(word) + 1 <= max_length:
	current_chunk += " " + word if current_chunk else word
	else:
	chunks.append(current_chunk)
	current_chunk = word

	if current_chunk:
	chunks.append(current_chunk)

	translated_chunks = [translator.translate(chunk) for chunk in chunks]
	return ' '.join(translated_chunks)

	# Transcribe audio using Whisper
	try:
	original_text = transcribe_audio_in_chunks(audio_path, whisper_model)
	st.write("Original Transcription:", original_text)

	# Translate text to the target language
	translator = Translator(to_lang=LANGUAGES[target_language])
	translated_text = translate_in_chunks(original_text, translator)
	st.write(f"Translated Text ({target_language}):", translated_text)

	# Convert translated text to speech
	tts = gTTS(text=translated_text, lang=LANGUAGES[target_language])
	translated_audio_path = tempfile.mktemp(suffix=".mp3")
	tts.save(translated_audio_path)

	# Merge translated audio with the original video
	final_video_path = tempfile.mktemp(suffix=".mp4")
	original_video = VideoFileClip(temp_video_path)
	translated_audio = AudioFileClip(translated_audio_path)

	final_video = original_video.set_audio(translated_audio)
	final_video.write_videofile(final_video_path, codec='libx264', audio_codec='aac')

	# Display success message and provide download link
	st.success("Translation successful! Download your translated video below:")
	st.video(final_video_path)

	# Provide download link
	with open(final_video_path, "rb") as f:
	st.download_button("Download Translated Video", f, file_name="translated_video.mp4")

	except Exception as e:
	st.error(f"Error during transcription/translation: {e}")
	translated_audio_path = None # Ensure this variable is defined

	# Clean up temporary files
	os.remove(temp_video_path)
	os.remove(audio_path)
	if translated_audio_path: # Only remove if it was created
	os.remove(translated_audio_path)
	if final_video_path: # Only remove if it was created
	os.remove(final_video_path)