import gradio as gr import moviepy.editor as mp from transformers import pipeline # Load Whisper model for speech-to-text asr = pipeline("automatic-speech-recognition", model="openai/whisper-large") # MarianMT or M2M100 for translation (multi-language) translator = pipeline("translation", model="facebook/m2m100_418M") # Supported languages with their codes languages = { "Persian (fa)": "fa", "French (fr)": "fr", "Spanish (es)": "es", "German (de)": "de", "Chinese (zh)": "zh", "Arabic (ar)": "ar", "Hindi (hi)": "hi", "Russian (ru)": "ru" } def generate_subtitles(video_file, language_name): try: # Extract the target language code from the selected language name target_language = languages[language_name] # Check if video_file is a file object or a file path string if isinstance(video_file, str): video_path = video_file # It's a file path else: video_path = video_file.name # It's a file object print(f"Processing video from path: {video_path}") # Extract audio from video using moviepy video = mp.VideoFileClip(video_path) audio = video.audio audio.write_audiofile("temp_audio.wav", codec='pcm_s16le') print("Starting speech-to-text transcription") # Convert speech to text (ASR using Whisper) with open("temp_audio.wav", "rb") as audio_file: transcription = asr(audio_file)["text"] print("Starting translation") # Translate transcription to the target language using M2M100 translation_pipeline = pipeline('translation', model='facebook/m2m100_418M') translated_subtitles = translation_pipeline( transcription, forced_bos_token_id=translation_pipeline.tokenizer.get_lang_id(target_language) )[0]["translation_text"] # Return subtitles subtitles = f"Original: {transcription}\nTranslated: {translated_subtitles}" return subtitles except Exception as e: # Catch and log the error print(f"Error occurred: {e}") return f"Error occurred: {e}" # Define Gradio interface def subtitle_video(video_file, language_name): try: # Handle both file-like objects and file paths return generate_subtitles(video_file, language_name) except Exception as e: print(f"Error in processing video: {e}") return f"Error in processing video: {e}" # Gradio app layout interface = gr.Interface( fn=subtitle_video, inputs=[ gr.Video(label="Upload Video"), gr.Dropdown( # Dropdown for language selection label="Choose Target Language", choices=list(languages.keys()), # Display language names in the dropdown value="Persian (fa)" # Default language ) ], outputs="text", title="Automatic Video Subtitler & Translator" ) interface.launch()