Spaces:

rgcodeai
/

kit-whisperx-web-ui

Runtime error

App Files Files Community

kit-whisperx-web-ui / app.py

rgcodeai

Upload 9 files

b8f59b3 verified about 1 year ago

raw

history blame

4.54 kB

	import gradio as gr
	import torch
	import time
	import os
	from src.transcription_utils import transcribe, language_options, model_options, ModelManager

	class TranscriptionApp:
	def __init__(self):
	"""
	Initializes an instance with a ModelManager for managing AI models,
	sets default device and model based on CUDA availability,
	and prepares a Gradio app and outputs dictionary for UI interactions and storing results.
	"""
	self.model_manager = ModelManager()
	self.default_device = "cuda" if torch.cuda.is_available() else "cpu"
	self.default_model = "Large-v2" if torch.cuda.is_available() else "Medium"
	self.app = gr.Blocks()
	self.outputs = {}
	self.last_transcription_time = 0

	# Crear carpeta Temp si no existe
	if not os.path.exists('Temp'):
	os.makedirs('Temp')

	def start_transcription(self, file, device, language, model):
	"""Start transcription process."""
	start_time = time.time()

	try:
	results = transcribe(file, device, language, model, self.model_manager)
	except ValueError as e:
	return str(e), 0

	end_time = time.time()
	self.last_transcription_time = round(end_time - start_time, 1)

	if results:
	json_output, txt_path, vtt_path, srt_path = results
	self.outputs = {
	'TXT': txt_path,
	'SRT': srt_path,
	'JSON': json_output,
	'VTT': vtt_path
	}
	return self.update_output_text('TXT'), self.last_transcription_time
	return "No transcription available.", self.last_transcription_time


	def update_output_text(self, format_choice):
	"""Update the text area based on the format choice."""
	if format_choice and self.outputs.get(format_choice):
	file_path = self.outputs[format_choice]
	try:
	with open(file_path, 'r', encoding='utf-8') as file:
	return file.read()
	except FileNotFoundError:
	return "File not found."
	return "No file available or format not selected."

	# User interface for the transcription kit using Gradio
	def setup_ui(self):
	with self.app:
	gr.Markdown("# Kit Transcriptor Whisperx")
	gr.Markdown("❤️ Follow us on [YouTube](https://www.youtube.com/channel/UC_YzjCh-CSSCSGANvt5wBNQ?sub_confirmation=1), [GitHub](https://github.com/rgcodeai) 🌐 More on [Mister Contenidos](https://mistercontenidos.com)")
	with gr.Row():
	with gr.Column():
	gr.Markdown("### Supported Formats: Audio (mp3, wav) and Video (mp4, avi, mov, flv)")
	file_input = gr.File(label="Upload your multimedia file", type="filepath")
	device_dropdown = gr.Dropdown(label="Select device", choices=["cuda", "cpu"], value=self.default_device)
	model_dropdown = gr.Dropdown(label="Select model", choices=list(model_options.keys()), value=self.default_model)
	language_dropdown = gr.Dropdown(label="Select language", choices=list(language_options.keys()), value="Identify")
	transcribe_button = gr.Button("Start Transcription")

	with gr.Column():
	transcription_time_display = gr.Textbox(label="Last Transcription Time (seconds)", interactive=False, lines=1)
	format_choice = gr.Radio(['TXT', 'SRT', 'VTT', 'JSON'], label="Select format to view:", value='TXT')
	output_text = gr.Textbox(label="File Content", interactive=False, lines=10)
	download_button = gr.Button("Download Transcription")
	format_choice.change(fn=self.update_output_text, inputs=format_choice, outputs=output_text, queue=True)
	download_button.click(fn=lambda x: self.outputs.get(x), inputs=format_choice, outputs=gr.File())

	transcribe_button.click(fn=self.start_transcription, inputs=[file_input, device_dropdown, language_dropdown, model_dropdown], outputs=[output_text, transcription_time_display])

	def launch(self):
	"""Launch the transcription application."""
	self.setup_ui()
	self.app.launch()


	if __name__ == '__main__':
	app = TranscriptionApp()
	app.launch()