# -*- coding: utf-8 -*- """ Created on Mon Dec 9 16:43:31 2024 @author: Pradeep Kumar """ import whisper import torch import os import gradio as gr from deep_translator import GoogleTranslator # Check if NVIDIA GPU is available DEVICE = "cuda" if torch.cuda.is_available() else "cpu" # Directories for transcripts BASE_DIR = os.getcwd() TRANSCRIPTS_FOLDER = os.path.join(BASE_DIR, 'transcripts') # Ensure transcripts directory exists def check_directory(path): if not os.path.exists(path): os.makedirs(path) check_directory(TRANSCRIPTS_FOLDER) def transcribe_and_translate(audio_file, selected_language, model_type="base"): """ Transcribe audio using Whisper and translate it into English if required. :param audio_file: Path to the uploaded audio file :param selected_language: Language code for transcription :param model_type: Whisper model type (default is 'base') :return: Transcription and translation """ temp_audio_path = os.path.join(BASE_DIR, audio_file.name) # Save the uploaded file to a temporary location with open(temp_audio_path, "wb") as f: f.write(audio_file.read()) try: # Load the Whisper model based on user selection model = whisper.load_model(model_type, device=DEVICE) except Exception as e: return f"Failed to load Whisper model ({model_type}): {e}" try: # Transcribe with the user-selected language if selected_language: result = model.transcribe(temp_audio_path, language=selected_language, verbose=False) else: return "Language selection is required." # Save the transcription with timestamps transcript_file = os.path.join(TRANSCRIPTS_FOLDER, f"{audio_file.name}_transcript.txt") translated_text = [] with open(transcript_file, 'w', encoding='utf-8') as text_file: for segment in result['segments']: start_time = segment['start'] end_time = segment['end'] text = segment['text'] text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text}\n") if selected_language in ['Dutch', 'English']: text_en = GoogleTranslator(source='auto', target='en').translate(text) translated_text.append(f"[{start_time:.2f} - {end_time:.2f}] {text_en}") text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text_en}\n") # Return the transcription and translation return "\n".join(translated_text) if translated_text else result['text'] except Exception as e: return f"Failed to process the audio file: {e}" finally: # Clean up temporary audio file if os.path.exists(temp_audio_path): os.remove(temp_audio_path) # Define the Gradio interface interface = gr.Interface( fn=transcribe_and_translate, inputs=[ gr.Audio(source="upload", type="file", label="Upload Audio"), gr.Dropdown(label="Select Language", choices=["nl","en"], value="mai"), gr.Dropdown(label="Select Model Type", choices=["tiny", "base", "small", "medium", "large"], value="base") ], outputs="text", title="Transcription and Translation" ) if __name__ == '__main__': # Launch the Gradio interface interface.launch()