BihariVernacular / BihariVernacular.py
NLPV's picture
Update BihariVernacular.py
4fe3864 verified
# -*- coding: utf-8 -*-
"""
Created on Mon Dec 9 16:43:31 2024
@author: Pradeep Kumar
"""
import whisper
import torch
import os
import gradio as gr
from deep_translator import GoogleTranslator
# Check if NVIDIA GPU is available
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# Directories for transcripts
BASE_DIR = os.getcwd()
TRANSCRIPTS_FOLDER = os.path.join(BASE_DIR, 'transcripts')
# Ensure transcripts directory exists
def check_directory(path):
if not os.path.exists(path):
os.makedirs(path)
check_directory(TRANSCRIPTS_FOLDER)
def transcribe_and_translate(audio_file, selected_language, model_type="base"):
"""
Transcribe audio using Whisper and translate it into English if required.
:param audio_file: Path to the uploaded audio file
:param selected_language: Language code for transcription
:param model_type: Whisper model type (default is 'base')
:return: Transcription and translation
"""
temp_audio_path = os.path.join(BASE_DIR, audio_file.name)
# Save the uploaded file to a temporary location
with open(temp_audio_path, "wb") as f:
f.write(audio_file.read())
try:
# Load the Whisper model based on user selection
model = whisper.load_model(model_type, device=DEVICE)
except Exception as e:
return f"Failed to load Whisper model ({model_type}): {e}"
try:
# Transcribe with the user-selected language
if selected_language:
result = model.transcribe(temp_audio_path, language=selected_language, verbose=False)
else:
return "Language selection is required."
# Save the transcription with timestamps
transcript_file = os.path.join(TRANSCRIPTS_FOLDER, f"{audio_file.name}_transcript.txt")
translated_text = []
with open(transcript_file, 'w', encoding='utf-8') as text_file:
for segment in result['segments']:
start_time = segment['start']
end_time = segment['end']
text = segment['text']
text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text}\n")
if selected_language in ['Dutch', 'English']:
text_en = GoogleTranslator(source='auto', target='en').translate(text)
translated_text.append(f"[{start_time:.2f} - {end_time:.2f}] {text_en}")
text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text_en}\n")
# Return the transcription and translation
return "\n".join(translated_text) if translated_text else result['text']
except Exception as e:
return f"Failed to process the audio file: {e}"
finally:
# Clean up temporary audio file
if os.path.exists(temp_audio_path):
os.remove(temp_audio_path)
# Define the Gradio interface
interface = gr.Interface(
fn=transcribe_and_translate,
inputs=[
gr.Audio(source="upload", type="file", label="Upload Audio"),
gr.Dropdown(label="Select Language", choices=["nl","en"], value="mai"),
gr.Dropdown(label="Select Model Type", choices=["tiny", "base", "small", "medium", "large"], value="base")
],
outputs="text",
title="Transcription and Translation"
)
if __name__ == '__main__':
# Launch the Gradio interface
interface.launch()