Spaces:

Lenylvt
/

Whisper-API

Sleeping

Whisper-API / app.py

Update app.py

64736ec verified about 1 year ago

1.65 kB

	import gradio as gr
	from faster_whisper import WhisperModel
	import logging

	# Configure logging for debugging purposes
	logging.basicConfig()
	logging.getLogger("faster_whisper").setLevel(logging.DEBUG)

	# Initialize the Whisper model with your desired configuration
	model_size = "large-v3" # Choose the model size
	device = "cpu" # GPU : cuda CPU : cpu
	compute_type = "int8" # GPU : float16 or int8 - CPU : int8

	model = WhisperModel(model_size, device=device, compute_type=compute_type)

	def format_timestamp(seconds):
	"""Convert seconds to HH:MM:SS.mmm format."""
	hours = int(seconds // 3600)
	minutes = int((seconds % 3600) // 60)
	seconds_remainder = seconds % 60
	return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"

	def transcribe(audio_file):
	# Transcribe the audio file
	segments, _ = model.transcribe(audio_file)

	# Format and gather transcription with enhanced timestamps
	transcription_with_timestamps = [
	f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}"
	for segment in segments
	]

	return "\n".join(transcription_with_timestamps)

	# Define the Gradio interface
	iface = gr.Interface(fn=transcribe,
	inputs=gr.Audio(sources="upload", type="filepath", label="Upload Audio"),
	outputs="text",
	title="Whisper Transcription with Enhanced Timestamps",
	description="Upload an audio file to get transcription with enhanced timestamps in HH:MM:SS.mmm format using Faster Whisper.")

	# Launch the app
	if __name__ == "__main__":
	iface.launch()