Spaces:

younes21000
/

DAI_Project

Sleeping

App Files Files Community

DAI_Project / app.py

younes21000

Update app.py

73c9093 verified 7 months ago

raw

history blame

3.86 kB

	import gradio as gr
	import moviepy.editor as mp
	import librosa
	import numpy as np
	from transformers import pipeline
	from concurrent.futures import ThreadPoolExecutor
	import tempfile

	# Load Whisper model for speech-to-text
	asr = pipeline("automatic-speech-recognition", model="openai/whisper-large")

	# MarianMT or M2M100 for translation (multi-language)
	translator = pipeline("translation", model="facebook/m2m100_418M")

	# Supported languages with their codes
	languages = {
	"Persian (fa)": "fa",
	"French (fr)": "fr",
	"Spanish (es)": "es",
	"German (de)": "de",
	"Chinese (zh)": "zh",
	"Arabic (ar)": "ar",
	"Hindi (hi)": "hi",
	"Russian (ru)": "ru"
	}

	def transcribe_audio(chunk):
	"""Transcribe a single audio chunk."""
	return asr(chunk)["text"]

	def generate_subtitles(video_file, language_name):
	try:
	# Extract the target language code from the selected language name
	target_language = languages[language_name]

	# Check if video_file is a file object or a file path string
	if isinstance(video_file, str):
	video_path = video_file # It's a file path
	else:
	video_path = video_file.name # It's a file object

	print(f"Processing video from path: {video_path}")

	# Load the video and extract audio directly
	video = mp.VideoFileClip(video_path)
	audio = video.audio

	# Use a temporary file to hold the audio data
	with tempfile.NamedTemporaryFile(delete=True) as tmp_audio_file:
	audio.write_audiofile(tmp_audio_file.name, codec='pcm_s16le')

	print("Starting speech-to-text transcription")

	# Load the audio file as a waveform using librosa
	waveform, sr = librosa.load(tmp_audio_file.name, sr=16000) # sr=16000 for Whisper

	# Process audio in chunks
	chunk_duration = 15 # seconds
	chunk_size = sr * chunk_duration # number of samples per chunk
	chunks = [waveform[i:i + chunk_size] for i in range(0, len(waveform), chunk_size) if len(waveform[i:i + chunk_size]) > 0]

	# Use ThreadPoolExecutor for parallel processing
	with ThreadPoolExecutor() as executor:
	transcriptions = list(executor.map(transcribe_audio, chunks))

	# Combine all transcriptions into a single string
	full_transcription = " ".join(transcriptions)

	print("Starting translation")

	# Translate transcription to the target language using M2M100
	translated_subtitles = translator(
	full_transcription,
	src_lang="en", # Source language is English
	tgt_lang=target_language # Target language from user selection
	)[0]["translation_text"]

	# Return subtitles
	subtitles = f"Original: {full_transcription}\nTranslated: {translated_subtitles}"
	return subtitles

	except Exception as e:
	# Catch and log the error
	print(f"Error occurred: {e}")
	return f"Error occurred: {e}"

	# Define Gradio interface
	def subtitle_video(video_file, language_name):
	try:
	# Handle both file-like objects and file paths
	return generate_subtitles(video_file, language_name)
	except Exception as e:
	print(f"Error in processing video: {e}")
	return f"Error in processing video: {e}"

	# Gradio app layout
	interface = gr.Interface(
	fn=subtitle_video,
	inputs=[
	gr.Video(label="Upload Video"),
	gr.Dropdown( # Dropdown for language selection
	label="Choose Target Language",
	choices=list(languages.keys()), # Display language names in the dropdown
	value="Persian (fa)" # Default language
	)
	],
	outputs="text",
	title="Automatic Video Subtitler & Translator"
	)

	interface.launch()