Spaces:
Sleeping
Sleeping
File size: 1,649 Bytes
c60e096 7752cd2 c60e096 7752cd2 c60e096 7752cd2 c60e096 7752cd2 c60e096 7752cd2 c60e096 6cb9375 c60e096 7752cd2 c60e096 7752cd2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import gradio as gr
from faster_whisper import WhisperModel
import logging
# Configure logging for debugging purposes
logging.basicConfig()
logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
# Initialize the Whisper model with your desired configuration
model_size = "large-v3" # Choose the model size
device = "cpu" # or "cuda" if GPU is available
compute_type = "float16" # Choose the compute type based on your hardware
model = WhisperModel(model_size=model_size, device=device, compute_type=compute_type)
def transcribe(audio_file):
# Enable word-level timestamps
segments, _ = model.transcribe(audio_file, word_timestamps=True)
# Format and gather transcription with timestamps
transcription_with_timestamps = []
for segment in segments:
segment_text = f"[{segment.start:.2f}s - {segment.end:.2f}s] {segment.text}\n"
# If word-level detail is desired
word_details = "\n".join(
f" [{word.start:.2f}s - {word.end:.2f}s] {word.word}" for word in segment.words
)
transcription_with_timestamps.append(segment_text + word_details)
return "\n".join(transcription_with_timestamps)
# Define the Gradio interface
iface = gr.Interface(fn=transcribe,
inputs=gr.Audio(sources="upload", type="filepath", label="Upload Audio"),
outputs="text",
title="Enhanced Whisper Transcription with Timestamps",
description="Upload an audio file to get detailed transcription with timestamps using Faster Whisper.")
# Launch the app
if __name__ == "__main__":
iface.launch()
|