import torch import os import gradio as gr from deep_translator import GoogleTranslator import whisper # Check if NVIDIA GPU is available DEVICE = "cuda" if torch.cuda.is_available() else "cpu" # Directories for transcripts BASE_DIR = os.getcwd() TRANSCRIPTS_FOLDER = os.path.join(BASE_DIR, 'transcripts') # Ensure transcripts directory exists def check_directory(path): if not os.path.exists(path): os.makedirs(path) check_directory(TRANSCRIPTS_FOLDER) def live_transcribe_and_translate(stream, selected_language, model_type="base"): """ Transcribe live audio using Whisper and translate it into English if required. :param stream: Stream of live audio data :param selected_language: Language code for transcription :param model_type: Whisper model type (default is 'base') :return: Transcription and translation """ try: # Load the Whisper model based on user selection model = whisper.load_model(model_type, device=DEVICE) except Exception as e: return f"Failed to load Whisper model ({model_type}): {e}" # Prepare audio processor audio_processor = whisper.audio.AudioProcessor(model, streaming=True) translated_text = [] transcript_file = os.path.join(TRANSCRIPTS_FOLDER, 'live_transcript.txt') with open(transcript_file, 'w', encoding='utf-8') as text_file: for chunk in stream: result = audio_processor.transcribe(chunk, return_timestamps=True) for segment in result['segments']: start_time = segment['start'] end_time = segment['end'] text = segment['text'] text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text}\n") if selected_language in ['nl']: text_en = GoogleTranslator(source='auto', target='en').translate(text) translated_text.append(f"[{start_time:.2f} - {end_time:.2f}] {text_en}") text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text_en}\n") return "\n".join(translated_text) if translated_text else "Live transcription completed." # Define the Gradio interface interface = gr.Interface( fn=live_transcribe_and_translate, inputs=[ gr.Audio(type="numpy", label="Upload Audio"), # Adjusted for pre-recorded or in-memory audio gr.Dropdown(label="Select Language", choices=["nl", "en"], value="en"), gr.Dropdown(label="Select Model Type", choices=["tiny", "base", "small", "medium", "large"], value="base") ], outputs="text", title="Live Transcription and Translation" ) if __name__ == '__main__': # Launch the Gradio interface interface.launch()