Spaces:
Running
Running
File size: 2,736 Bytes
90951fb 34a09fd 90951fb 34a09fd 90951fb 38322ca 90951fb 9a34dcd 90951fb 9a34dcd 34a09fd 9a34dcd 34a09fd 90951fb 34a09fd 9a34dcd 90951fb 9a34dcd 90951fb b589aed 90951fb 34a09fd 90951fb 9a34dcd 34a09fd 9a34dcd 34a09fd 3169e8f 9a34dcd 34a09fd 9a34dcd 34a09fd 90951fb e4dba21 90951fb 34a09fd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import torch
import os
import gradio as gr
from deep_translator import GoogleTranslator
import whisper
# Check if NVIDIA GPU is available
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# Directories for transcripts
BASE_DIR = os.getcwd()
TRANSCRIPTS_FOLDER = os.path.join(BASE_DIR, 'transcripts')
# Ensure transcripts directory exists
def check_directory(path):
if not os.path.exists(path):
os.makedirs(path)
check_directory(TRANSCRIPTS_FOLDER)
def live_transcribe_and_translate(stream, selected_language, model_type="base"):
"""
Transcribe live audio using Whisper and translate it into English if required.
:param stream: Stream of live audio data
:param selected_language: Language code for transcription
:param model_type: Whisper model type (default is 'base')
:return: Transcription and translation
"""
try:
# Load the Whisper model based on user selection
model = whisper.load_model(model_type, device=DEVICE)
except Exception as e:
return f"Failed to load Whisper model ({model_type}): {e}"
# Prepare audio processor
audio_processor = whisper.audio.AudioProcessor(model, streaming=True)
translated_text = []
transcript_file = os.path.join(TRANSCRIPTS_FOLDER, 'live_transcript.txt')
with open(transcript_file, 'w', encoding='utf-8') as text_file:
for chunk in stream:
result = audio_processor.transcribe(chunk, return_timestamps=True)
for segment in result['segments']:
start_time = segment['start']
end_time = segment['end']
text = segment['text']
text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text}\n")
if selected_language in ['nl']:
text_en = GoogleTranslator(source='auto', target='en').translate(text)
translated_text.append(f"[{start_time:.2f} - {end_time:.2f}] {text_en}")
text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text_en}\n")
return "\n".join(translated_text) if translated_text else "Live transcription completed."
# Define the Gradio interface
interface = gr.Interface(
fn=live_transcribe_and_translate,
inputs=[
gr.Audio(type="numpy", label="Upload Audio"), # Adjusted for pre-recorded or in-memory audio
gr.Dropdown(label="Select Language", choices=["nl", "en"], value="en"),
gr.Dropdown(label="Select Model Type", choices=["tiny", "base", "small", "medium", "large"], value="base")
],
outputs="text",
title="Live Transcription and Translation"
)
if __name__ == '__main__':
# Launch the Gradio interface
interface.launch()
|