Spaces:

younes21000
/

DAI_Project

Sleeping

App Files Files Community

DAI_Project / app.py

younes21000

Update app.py

58f4eed verified 9 months ago

raw

history blame

3.89 kB

	import gradio as gr
	import moviepy.editor as mp
	import librosa
	from transformers import pipeline
	from concurrent.futures import ThreadPoolExecutor
	import tempfile

	# Load Whisper model for speech-to-text
	asr = pipeline("automatic-speech-recognition", model="openai/whisper-large")

	# MarianMT or M2M100 for translation (multi-language)
	translator = pipeline("translation", model="facebook/m2m100_418M")

	# Supported languages with their codes
	languages = {
	"Persian (fa)": "fa",
	"French (fr)": "fr",
	"Spanish (es)": "es",
	"German (de)": "de",
	"Chinese (zh)": "zh",
	"Arabic (ar)": "ar",
	"Hindi (hi)": "hi",
	"Russian (ru)": "ru"
	}

	def transcribe_audio(chunk):
	"""Transcribe a single audio chunk."""
	return asr(chunk)["text"]

	def generate_subtitles(video_file, language_name):
	try:
	# Extract the target language code from the selected language name
	target_language = languages[language_name]

	# Check if video_file is a file object or a file path string
	if isinstance(video_file, str):
	video_path = video_file # It's a file path
	else:
	video_path = video_file.name # It's a file object

	print(f"Processing video from path: {video_path}")

	# Load the video and extract audio directly
	video = mp.VideoFileClip(video_path)
	audio = video.audio

	# Use a temporary file to hold the audio data
	with tempfile.NamedTemporaryFile(delete=True, suffix='.wav') as tmp_audio_file:
	audio.write_audiofile(tmp_audio_file.name, codec='pcm_s16le') # Specify codec as pcm_s16le

	print("Starting speech-to-text transcription")

	# Load the audio file as a waveform using librosa
	waveform, sr = librosa.load(tmp_audio_file.name, sr=16000) # sr=16000 for Whisper

	# Process audio in chunks
	chunk_duration = 15 # seconds
	chunk_size = sr * chunk_duration # number of samples per chunk
	chunks = [waveform[i:i + chunk_size] for i in range(0, len(waveform), chunk_size) if len(waveform[i:i + chunk_size]) > 0]

	# Use ThreadPoolExecutor for parallel processing
	with ThreadPoolExecutor() as executor:
	transcriptions = list(executor.map(transcribe_audio, chunks))

	# Combine all transcriptions into a single string
	full_transcription = " ".join(transcriptions)

	print("Starting translation")

	# Translate transcription to the target language using M2M100
	translated_subtitles = translator(
	full_transcription,
	src_lang="en", # Source language is English
	tgt_lang=target_language # Target language from user selection
	)[0]["translation_text"]

	# Return subtitles
	subtitles = f"Original: {full_transcription}\nTranslated: {translated_subtitles}"
	return subtitles

	except Exception as e:
	# Catch and log the error
	print(f"Error occurred: {e}")
	return f"Error occurred: {e}"

	# Define Gradio interface
	def subtitle_video(video_file, language_name):
	try:
	# Handle both file-like objects and file paths
	return generate_subtitles(video_file, language_name)
	except Exception as e:
	print(f"Error in processing video: {e}")
	return f"Error in processing video: {e}"

	# Gradio app layout
	interface = gr.Interface(
	fn=subtitle_video,
	inputs=[
	gr.Video(label="Upload Video"),
	gr.Dropdown( # Dropdown for language selection
	label="Choose Target Language",
	choices=list(languages.keys()), # Display language names in the dropdown
	value="Persian (fa)" # Default language
	)
	],
	outputs="text",
	title="Automatic Video Subtitler & Translator"
	)

	interface.launch()