Whisper-API / app.py
Lenylvt's picture
Update app.py
64736ec verified
raw
history blame
1.65 kB
import gradio as gr
from faster_whisper import WhisperModel
import logging
# Configure logging for debugging purposes
logging.basicConfig()
logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
# Initialize the Whisper model with your desired configuration
model_size = "large-v3" # Choose the model size
device = "cpu" # GPU : cuda CPU : cpu
compute_type = "int8" # GPU : float16 or int8 - CPU : int8
model = WhisperModel(model_size, device=device, compute_type=compute_type)
def format_timestamp(seconds):
"""Convert seconds to HH:MM:SS.mmm format."""
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
seconds_remainder = seconds % 60
return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"
def transcribe(audio_file):
# Transcribe the audio file
segments, _ = model.transcribe(audio_file)
# Format and gather transcription with enhanced timestamps
transcription_with_timestamps = [
f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}"
for segment in segments
]
return "\n".join(transcription_with_timestamps)
# Define the Gradio interface
iface = gr.Interface(fn=transcribe,
inputs=gr.Audio(sources="upload", type="filepath", label="Upload Audio"),
outputs="text",
title="Whisper Transcription with Enhanced Timestamps",
description="Upload an audio file to get transcription with enhanced timestamps in HH:MM:SS.mmm format using Faster Whisper.")
# Launch the app
if __name__ == "__main__":
iface.launch()