NLPV's picture
Update app.py
3169e8f verified
import torch
import os
import gradio as gr
from deep_translator import GoogleTranslator
import whisper
# Check if NVIDIA GPU is available
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# Directories for transcripts
BASE_DIR = os.getcwd()
TRANSCRIPTS_FOLDER = os.path.join(BASE_DIR, 'transcripts')
# Ensure transcripts directory exists
def check_directory(path):
if not os.path.exists(path):
os.makedirs(path)
check_directory(TRANSCRIPTS_FOLDER)
def live_transcribe_and_translate(stream, selected_language, model_type="base"):
"""
Transcribe live audio using Whisper and translate it into English if required.
:param stream: Stream of live audio data
:param selected_language: Language code for transcription
:param model_type: Whisper model type (default is 'base')
:return: Transcription and translation
"""
try:
# Load the Whisper model based on user selection
model = whisper.load_model(model_type, device=DEVICE)
except Exception as e:
return f"Failed to load Whisper model ({model_type}): {e}"
# Prepare audio processor
audio_processor = whisper.audio.AudioProcessor(model, streaming=True)
translated_text = []
transcript_file = os.path.join(TRANSCRIPTS_FOLDER, 'live_transcript.txt')
with open(transcript_file, 'w', encoding='utf-8') as text_file:
for chunk in stream:
result = audio_processor.transcribe(chunk, return_timestamps=True)
for segment in result['segments']:
start_time = segment['start']
end_time = segment['end']
text = segment['text']
text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text}\n")
if selected_language in ['nl']:
text_en = GoogleTranslator(source='auto', target='en').translate(text)
translated_text.append(f"[{start_time:.2f} - {end_time:.2f}] {text_en}")
text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text_en}\n")
return "\n".join(translated_text) if translated_text else "Live transcription completed."
# Define the Gradio interface
interface = gr.Interface(
fn=live_transcribe_and_translate,
inputs=[
gr.Audio(type="numpy", label="Upload Audio"), # Adjusted for pre-recorded or in-memory audio
gr.Dropdown(label="Select Language", choices=["nl", "en"], value="en"),
gr.Dropdown(label="Select Model Type", choices=["tiny", "base", "small", "medium", "large"], value="base")
],
outputs="text",
title="Live Transcription and Translation"
)
if __name__ == '__main__':
# Launch the Gradio interface
interface.launch()