Spaces:
Running
Running
import torch | |
import os | |
import gradio as gr | |
from deep_translator import GoogleTranslator | |
import whisper | |
# Check if NVIDIA GPU is available | |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
# Directories for transcripts | |
BASE_DIR = os.getcwd() | |
TRANSCRIPTS_FOLDER = os.path.join(BASE_DIR, 'transcripts') | |
# Ensure transcripts directory exists | |
def check_directory(path): | |
if not os.path.exists(path): | |
os.makedirs(path) | |
check_directory(TRANSCRIPTS_FOLDER) | |
def live_transcribe_and_translate(stream, selected_language, model_type="base"): | |
""" | |
Transcribe live audio using Whisper and translate it into English if required. | |
:param stream: Stream of live audio data | |
:param selected_language: Language code for transcription | |
:param model_type: Whisper model type (default is 'base') | |
:return: Transcription and translation | |
""" | |
try: | |
# Load the Whisper model based on user selection | |
model = whisper.load_model(model_type, device=DEVICE) | |
except Exception as e: | |
return f"Failed to load Whisper model ({model_type}): {e}" | |
# Prepare audio processor | |
audio_processor = whisper.audio.AudioProcessor(model, streaming=True) | |
translated_text = [] | |
transcript_file = os.path.join(TRANSCRIPTS_FOLDER, 'live_transcript.txt') | |
with open(transcript_file, 'w', encoding='utf-8') as text_file: | |
for chunk in stream: | |
result = audio_processor.transcribe(chunk, return_timestamps=True) | |
for segment in result['segments']: | |
start_time = segment['start'] | |
end_time = segment['end'] | |
text = segment['text'] | |
text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text}\n") | |
if selected_language in ['nl']: | |
text_en = GoogleTranslator(source='auto', target='en').translate(text) | |
translated_text.append(f"[{start_time:.2f} - {end_time:.2f}] {text_en}") | |
text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text_en}\n") | |
return "\n".join(translated_text) if translated_text else "Live transcription completed." | |
# Define the Gradio interface | |
interface = gr.Interface( | |
fn=live_transcribe_and_translate, | |
inputs=[ | |
gr.Audio(type="numpy", label="Upload Audio"), # Adjusted for pre-recorded or in-memory audio | |
gr.Dropdown(label="Select Language", choices=["nl", "en"], value="en"), | |
gr.Dropdown(label="Select Model Type", choices=["tiny", "base", "small", "medium", "large"], value="base") | |
], | |
outputs="text", | |
title="Live Transcription and Translation" | |
) | |
if __name__ == '__main__': | |
# Launch the Gradio interface | |
interface.launch() | |