Spaces:
Sleeping
Sleeping
import gradio as gr | |
import moviepy.editor as mp | |
from transformers import pipeline | |
# Load Whisper model for speech-to-text | |
asr = pipeline("automatic-speech-recognition", model="openai/whisper-large") | |
# MarianMT or M2M100 for translation (multi-language) | |
translator = pipeline("translation", model="facebook/m2m100_418M") | |
def generate_subtitles(video_path, target_language): | |
# Extract audio from video | |
video = mp.VideoFileClip(video_path) | |
audio = video.audio | |
audio.write_audiofile("temp_audio.wav", codec='pcm_s16le') | |
# Convert speech to text (ASR using Whisper) | |
with open("temp_audio.wav", "rb") as audio_file: | |
transcription = asr(audio_file)["text"] | |
# Translate transcription to the target language using M2M100 | |
# The forced_bos_token_id is set based on the target language | |
translation_pipeline = pipeline('translation', model='facebook/m2m100_418M') | |
translated_subtitles = translation_pipeline( | |
transcription, | |
forced_bos_token_id=translation_pipeline.tokenizer.get_lang_id(target_language) | |
)[0]["translation_text"] | |
# Return subtitles (text for now) | |
subtitles = f"Original: {transcription}\nTranslated: {translated_subtitles}" | |
return subtitles | |
# Define Gradio interface | |
def subtitle_video(video_file, target_language): | |
video_path = video_file.name | |
return generate_subtitles(video_path, target_language) | |
# Gradio app layout | |
interface = gr.Interface( | |
fn=subtitle_video, | |
inputs=[ | |
gr.Video(label="Upload Video"), | |
gr.Dropdown( # Dropdown for language selection | |
label="Choose Target Language", | |
choices=[ | |
"fa", # Persian | |
"fr", # French | |
"es", # Spanish | |
"de", # German | |
"zh", # Chinese | |
"ar", # Arabic | |
"hi", # Hindi | |
"ru" # Russian | |
], | |
value="fa", # Default to Persian | |
), | |
], | |
outputs="text", | |
title="Automatic Video Subtitler & Translator" | |
) | |
interface.launch() | |