Spaces:

younes21000
/

DAI_Project

Sleeping

App Files Files Community

DAI_Project / app.py

younes21000

Update app.py

5ebcbb6 verified 9 months ago

raw

history blame

3.66 kB

	import gradio as gr
	import moviepy.editor as mp
	import librosa
	import numpy as np
	from transformers import pipeline

	# Load Whisper model for speech-to-text
	asr = pipeline("automatic-speech-recognition", model="openai/whisper-large")

	# MarianMT or M2M100 for translation (multi-language)
	translator = pipeline("translation", model="facebook/m2m100_418M")

	# Supported languages with their codes
	languages = {
	"Persian (fa)": "fa",
	"French (fr)": "fr",
	"Spanish (es)": "es",
	"German (de)": "de",
	"Chinese (zh)": "zh",
	"Arabic (ar)": "ar",
	"Hindi (hi)": "hi",
	"Russian (ru)": "ru"
	}

	def generate_subtitles(video_file, language_name):
	try:
	# Extract the target language code from the selected language name
	target_language = languages[language_name]

	# Check if video_file is a file object or a file path string
	if isinstance(video_file, str):
	video_path = video_file # It's a file path
	else:
	video_path = video_file.name # It's a file object

	print(f"Processing video from path: {video_path}")

	# Extract audio from video using moviepy
	video = mp.VideoFileClip(video_path)
	audio_path = "temp_audio.wav"
	audio = video.audio
	audio.write_audiofile(audio_path, codec='pcm_s16le')

	print("Starting speech-to-text transcription")

	# Load the audio file as a waveform using librosa
	waveform, sr = librosa.load(audio_path, sr=16000) # sr=16000 for Whisper

	# Process audio in chunks
	chunk_duration = 30 # seconds
	chunk_size = sr * chunk_duration # number of samples per chunk
	transcriptions = []

	for i in range(0, len(waveform), chunk_size):
	chunk = waveform[i:i + chunk_size]
	if len(chunk) == 0:
	break # Avoid processing empty chunks

	# Pass the chunk to Whisper's ASR model
	transcription = asr(chunk)["text"]
	transcriptions.append(transcription)

	# Combine all transcriptions into a single string
	full_transcription = " ".join(transcriptions)

	print("Starting translation")

	# Translate transcription to the target language using M2M100
	translation_pipeline = pipeline('translation', model='facebook/m2m100_418M')
	translated_subtitles = translation_pipeline(
	full_transcription,
	forced_bos_token_id=translation_pipeline.tokenizer.get_lang_id(target_language)
	)[0]["translation_text"]

	# Return subtitles
	subtitles = f"Original: {full_transcription}\nTranslated: {translated_subtitles}"
	return subtitles

	except Exception as e:
	# Catch and log the error
	print(f"Error occurred: {e}")
	return f"Error occurred: {e}"

	# Define Gradio interface
	def subtitle_video(video_file, language_name):
	try:
	# Handle both file-like objects and file paths
	return generate_subtitles(video_file, language_name)
	except Exception as e:
	print(f"Error in processing video: {e}")
	return f"Error in processing video: {e}"

	# Gradio app layout
	interface = gr.Interface(
	fn=subtitle_video,
	inputs=[
	gr.Video(label="Upload Video"),
	gr.Dropdown( # Dropdown for language selection
	label="Choose Target Language",
	choices=list(languages.keys()), # Display language names in the dropdown
	value="Persian (fa)" # Default language
	)
	],
	outputs="text",
	title="Automatic Video Subtitler & Translator"
	)

	interface.launch()