Spaces:

Artificial-superintelligence
/

Aita

Running

App Files Files Community

Aita / app.py

Artificial-superintelligence

Update app.py

3bbf04a verified 8 months ago

raw

history blame

3.54 kB

	import streamlit as st
	from moviepy.editor import VideoFileClip
	import whisper
	from translate import Translator
	from gtts import gTTS
	import tempfile
	import os
	import numpy as np

	# Initialize Whisper model
	try:
	whisper_model = whisper.load_model("base") # Ensure the model is installed from the correct Whisper library
	except AttributeError:
	st.error("Whisper model could not be loaded. Ensure that Whisper is installed from GitHub.")

	# Language options
	LANGUAGES = {
	'English': 'en',
	'Tamil': 'ta',
	'Sinhala': 'si',
	'French': 'fr', # Add more languages as needed
	}

	st.title("AI Video Translator with Whisper and GTTS")

	# Step 1: Upload video file
	video_file = st.file_uploader("Upload a video file", type=["mp4", "mov", "avi", "mkv"])

	if video_file:
	# Step 2: Select translation language
	target_language = st.selectbox("Select the target language for translation", list(LANGUAGES.keys()))

	# Process when user clicks translate
	if st.button("Translate Video"):
	# Save video to a temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_video:
	temp_video.write(video_file.read())
	temp_video_path = temp_video.name

	# Extract audio from video
	try:
	video = VideoFileClip(temp_video_path)
	audio_path = tempfile.mktemp(suffix=".wav")
	video.audio.write_audiofile(audio_path)
	except Exception as e:
	st.error(f"Error extracting audio from video: {e}")
	os.remove(temp_video_path)
	st.stop()

	# Function to transcribe audio in chunks
	def transcribe_audio_in_chunks(audio_path, model, chunk_length=30):
	audio_clip = whisper.load_audio(audio_path)
	audio_duration = len(audio_clip) / whisper.SAMPLE_RATE # Get audio duration in seconds
	segments = []

	for start in np.arange(0, audio_duration, chunk_length):
	end = min(start + chunk_length, audio_duration)
	segment = audio_clip[int(start * whisper.SAMPLE_RATE):int(end * whisper.SAMPLE_RATE)] # Convert to the right format
	result = model.transcribe(segment)
	segments.append(result['text'])

	return ' '.join(segments)

	# Transcribe audio using Whisper
	try:
	original_text = transcribe_audio_in_chunks(audio_path, whisper_model)
	st.write("Original Transcription:", original_text)

	# Translate text to the target language
	translator = Translator(to_lang=LANGUAGES[target_language])
	translated_text = translator.translate(original_text)
	st.write(f"Translated Text ({target_language}):", translated_text)

	# Convert translated text to speech
	tts = gTTS(text=translated_text, lang=LANGUAGES[target_language])
	audio_output_path = tempfile.mktemp(suffix=".mp3")
	tts.save(audio_output_path)

	# Display translated text and audio
	st.success("Translation successful!")
	st.audio(audio_output_path, format="audio/mp3")
	except Exception as e:
	st.error(f"Error during transcription/translation: {e}")
	audio_output_path = None # Ensure this variable is defined

	# Clean up temporary files
	os.remove(temp_video_path)
	os.remove(audio_path)
	if audio_output_path: # Only remove if it was created
	os.remove(audio_output_path)