import gradio as gr import moviepy.editor as mp from transformers import pipeline # Load Whisper model for speech-to-text asr = pipeline("automatic-speech-recognition", model="openai/whisper-large") # MarianMT or M2M100 for translation (multi-language) translator = pipeline("translation", model="facebook/m2m100_418M") def generate_subtitles(video_path, target_language): # Extract audio from video video = mp.VideoFileClip(video_path) audio = video.audio audio.write_audiofile("temp_audio.wav", codec='pcm_s16le') # Convert speech to text (ASR using Whisper) with open("temp_audio.wav", "rb") as audio_file: transcription = asr(audio_file)["text"] # Translate transcription to the target language using M2M100 # The forced_bos_token_id is set based on the target language translation_pipeline = pipeline('translation', model='facebook/m2m100_418M') translated_subtitles = translation_pipeline( transcription, forced_bos_token_id=translation_pipeline.tokenizer.get_lang_id(target_language) )[0]["translation_text"] # Return subtitles (text for now) subtitles = f"Original: {transcription}\nTranslated: {translated_subtitles}" return subtitles # Define Gradio interface def subtitle_video(video_file, target_language): video_path = video_file.name return generate_subtitles(video_path, target_language) # Gradio app layout interface = gr.Interface( fn=subtitle_video, inputs=[ gr.Video(label="Upload Video"), gr.Dropdown( # Dropdown for language selection label="Choose Target Language", choices=[ "fa", # Persian "fr", # French "es", # Spanish "de", # German "zh", # Chinese "ar", # Arabic "hi", # Hindi "ru" # Russian ], value="fa", # Default to Persian ), ], outputs="text", title="Automatic Video Subtitler & Translator" ) interface.launch()