Spaces:
Sleeping
Sleeping
import gradio as gr | |
from faster_whisper import WhisperModel | |
import logging | |
# Configure logging for debugging purposes | |
logging.basicConfig() | |
logging.getLogger("faster_whisper").setLevel(logging.DEBUG) | |
# Initialize the Whisper model with your desired configuration | |
model_size = "large-v3" # Choose the model size | |
device = "cpu" # or "cuda" if GPU is available | |
compute_type = "float16" # Choose the compute type based on your hardware | |
model = WhisperModel(model_size=model_size, device=device, compute_type=compute_type) | |
def transcribe(audio_file): | |
# Enable word-level timestamps | |
segments, _ = model.transcribe(audio_file, word_timestamps=True) | |
# Format and gather transcription with timestamps | |
transcription_with_timestamps = [] | |
for segment in segments: | |
segment_text = f"[{segment.start:.2f}s - {segment.end:.2f}s] {segment.text}\n" | |
# If word-level detail is desired | |
word_details = "\n".join( | |
f" [{word.start:.2f}s - {word.end:.2f}s] {word.word}" for word in segment.words | |
) | |
transcription_with_timestamps.append(segment_text + word_details) | |
return "\n".join(transcription_with_timestamps) | |
# Define the Gradio interface | |
iface = gr.Interface(fn=transcribe, | |
inputs=gr.Audio(sources="upload", type="filepath", label="Upload Audio"), | |
outputs="text", | |
title="Enhanced Whisper Transcription with Timestamps", | |
description="Upload an audio file to get detailed transcription with timestamps using Faster Whisper.") | |
# Launch the app | |
if __name__ == "__main__": | |
iface.launch() | |