Spaces:
Sleeping
Sleeping
File size: 1,647 Bytes
c60e096 7752cd2 c60e096 7752cd2 c60e096 7752cd2 dc2f23e af1960a 7752cd2 af1960a c60e096 dc2f23e 7752cd2 dc2f23e 3a81031 7752cd2 dc2f23e 3a81031 dc2f23e 3a81031 7752cd2 c60e096 7752cd2 c60e096 dc2f23e c60e096 dc2f23e c60e096 7752cd2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import gradio as gr
from faster_whisper import WhisperModel
import logging
# Configure logging for debugging purposes
logging.basicConfig()
logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
# Initialize the Whisper model with your desired configuration
model_size = "large-v3" # Choose the model size
device = "cpu" # GPU : cuda CPU : cpu
compute_type = "int8" # GPU : float16 or int8 - CPU : int8
model = WhisperModel(model_size, device=device, compute_type=compute_type)
def format_timestamp(seconds):
"""Convert seconds to HH:MM:SS.mmm format."""
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
seconds_remainder = seconds % 60
return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"
def transcribe(audio_file):
# Transcribe the audio file
segments, _ = model.transcribe(audio_file)
# Format and gather transcription with enhanced timestamps
transcription_with_timestamps = [
f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}"
for segment in segments
]
return "\n".join(transcription_with_timestamps)
# Define the Gradio interface
iface = gr.Interface(fn=transcribe,
inputs=gr.inputs.Audio(source="upload", type="file", label="Upload Audio"),
outputs="text",
title="Whisper Transcription with Enhanced Timestamps",
description="Upload an audio file to get transcription with enhanced timestamps in HH:MM:SS.mmm format using Faster Whisper.")
# Launch the app
if __name__ == "__main__":
iface.launch()
|