Spaces:
Sleeping
Sleeping
# -*- coding: utf-8 -*- | |
""" | |
Created on Mon Dec 9 16:43:31 2024 | |
@author: Pradeep Kumar | |
""" | |
import whisper | |
import torch | |
import os | |
import gradio as gr | |
from deep_translator import GoogleTranslator | |
# Check if NVIDIA GPU is available | |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
# Directories for transcripts | |
BASE_DIR = os.getcwd() | |
TRANSCRIPTS_FOLDER = os.path.join(BASE_DIR, 'transcripts') | |
# Ensure transcripts directory exists | |
def check_directory(path): | |
if not os.path.exists(path): | |
os.makedirs(path) | |
check_directory(TRANSCRIPTS_FOLDER) | |
def transcribe_and_translate(audio_file, selected_language, model_type="base"): | |
""" | |
Transcribe audio using Whisper and translate it into English if required. | |
:param audio_file: Path to the uploaded audio file | |
:param selected_language: Language code for transcription | |
:param model_type: Whisper model type (default is 'base') | |
:return: Transcription and translation | |
""" | |
temp_audio_path = os.path.join(BASE_DIR, audio_file.name) | |
# Save the uploaded file to a temporary location | |
with open(temp_audio_path, "wb") as f: | |
f.write(audio_file.read()) | |
try: | |
# Load the Whisper model based on user selection | |
model = whisper.load_model(model_type, device=DEVICE) | |
except Exception as e: | |
return f"Failed to load Whisper model ({model_type}): {e}" | |
try: | |
# Transcribe with the user-selected language | |
if selected_language: | |
result = model.transcribe(temp_audio_path, language=selected_language, verbose=False) | |
else: | |
return "Language selection is required." | |
# Save the transcription with timestamps | |
transcript_file = os.path.join(TRANSCRIPTS_FOLDER, f"{audio_file.name}_transcript.txt") | |
translated_text = [] | |
with open(transcript_file, 'w', encoding='utf-8') as text_file: | |
for segment in result['segments']: | |
start_time = segment['start'] | |
end_time = segment['end'] | |
text = segment['text'] | |
text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text}\n") | |
if selected_language in ['Dutch', 'English']: | |
text_en = GoogleTranslator(source='auto', target='en').translate(text) | |
translated_text.append(f"[{start_time:.2f} - {end_time:.2f}] {text_en}") | |
text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text_en}\n") | |
# Return the transcription and translation | |
return "\n".join(translated_text) if translated_text else result['text'] | |
except Exception as e: | |
return f"Failed to process the audio file: {e}" | |
finally: | |
# Clean up temporary audio file | |
if os.path.exists(temp_audio_path): | |
os.remove(temp_audio_path) | |
# Define the Gradio interface | |
interface = gr.Interface( | |
fn=transcribe_and_translate, | |
inputs=[ | |
gr.Audio(source="upload", type="file", label="Upload Audio"), | |
gr.Dropdown(label="Select Language", choices=["nl","en"], value="mai"), | |
gr.Dropdown(label="Select Model Type", choices=["tiny", "base", "small", "medium", "large"], value="base") | |
], | |
outputs="text", | |
title="Transcription and Translation" | |
) | |
if __name__ == '__main__': | |
# Launch the Gradio interface | |
interface.launch() | |