import gradio as gr import moviepy.editor as mp from transformers import pipeline # Load Whisper model for speech-to-text asr = pipeline("automatic-speech-recognition", model="openai/whisper-large") # MarianMT or M2M100 for translation (multi-language) translator = pipeline("translation", model="facebook/m2m100_418M") def generate_subtitles(video_path, target_language): # Extract audio from video video = mp.VideoFileClip(video_path) audio = video.audio audio.write_audiofile("temp_audio.wav", codec='pcm_s16le') # Convert speech to text (ASR using Whisper) with open("temp_audio.wav", "rb") as audio_file: transcription = asr(audio_file)["text"] # Translate transcription to the target language translation_pipeline = pipeline('translation', model='facebook/m2m100_418M') translated_subtitles = translation_pipeline(transcription, forced_bos_token_id=translation_pipeline.tokenizer.get_lang_id(target_language))[0]["translation_text"] # Return subtitles (text for now) subtitles = f"Original: {transcription}\nTranslated: {translated_subtitles}" return subtitles # Define Gradio interface def subtitle_video(video_file, target_language): video_path = video_file.name return generate_subtitles(video_path, target_language) # Gradio app layout interface = gr.Interface( fn=subtitle_video, inputs=[ gr.Video(label="Upload Video"), gr.Textbox(label="Target Language Code (e.g., 'fr' for French, 'es' for Spanish)"), ], outputs="text", title="Automatic Video Subtitler & Translator" ) interface.launch()