Spaces:

NLPV
/

BihariVernacular

Sleeping

App Files Files Community

BihariVernacular / BihariVernacular.py

NLPV

Update BihariVernacular.py

4fe3864 verified 4 months ago

raw

history blame contribute delete

3.37 kB


	# -- coding: utf-8 --
	"""
	Created on Mon Dec 9 16:43:31 2024

	@author: Pradeep Kumar
	"""
	import whisper
	import torch
	import os
	import gradio as gr
	from deep_translator import GoogleTranslator

	# Check if NVIDIA GPU is available
	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

	# Directories for transcripts
	BASE_DIR = os.getcwd()
	TRANSCRIPTS_FOLDER = os.path.join(BASE_DIR, 'transcripts')

	# Ensure transcripts directory exists
	def check_directory(path):
	if not os.path.exists(path):
	os.makedirs(path)

	check_directory(TRANSCRIPTS_FOLDER)

	def transcribe_and_translate(audio_file, selected_language, model_type="base"):
	"""
	Transcribe audio using Whisper and translate it into English if required.

	:param audio_file: Path to the uploaded audio file
	:param selected_language: Language code for transcription
	:param model_type: Whisper model type (default is 'base')
	:return: Transcription and translation
	"""
	temp_audio_path = os.path.join(BASE_DIR, audio_file.name)

	# Save the uploaded file to a temporary location
	with open(temp_audio_path, "wb") as f:
	f.write(audio_file.read())

	try:
	# Load the Whisper model based on user selection
	model = whisper.load_model(model_type, device=DEVICE)
	except Exception as e:
	return f"Failed to load Whisper model ({model_type}): {e}"

	try:
	# Transcribe with the user-selected language
	if selected_language:
	result = model.transcribe(temp_audio_path, language=selected_language, verbose=False)
	else:
	return "Language selection is required."

	# Save the transcription with timestamps
	transcript_file = os.path.join(TRANSCRIPTS_FOLDER, f"{audio_file.name}_transcript.txt")

	translated_text = []
	with open(transcript_file, 'w', encoding='utf-8') as text_file:
	for segment in result['segments']:
	start_time = segment['start']
	end_time = segment['end']
	text = segment['text']
	text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text}\n")
	if selected_language in ['Dutch', 'English']:
	text_en = GoogleTranslator(source='auto', target='en').translate(text)
	translated_text.append(f"[{start_time:.2f} - {end_time:.2f}] {text_en}")
	text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text_en}\n")

	# Return the transcription and translation
	return "\n".join(translated_text) if translated_text else result['text']

	except Exception as e:
	return f"Failed to process the audio file: {e}"

	finally:
	# Clean up temporary audio file
	if os.path.exists(temp_audio_path):
	os.remove(temp_audio_path)

	# Define the Gradio interface
	interface = gr.Interface(
	fn=transcribe_and_translate,
	inputs=[
	gr.Audio(source="upload", type="file", label="Upload Audio"),
	gr.Dropdown(label="Select Language", choices=["nl","en"], value="mai"),
	gr.Dropdown(label="Select Model Type", choices=["tiny", "base", "small", "medium", "large"], value="base")
	],
	outputs="text",
	title="Transcription and Translation"
	)

	if __name__ == '__main__':
	# Launch the Gradio interface
	interface.launch()