Spaces:

Lenylvt
/

Whisper-API

Sleeping

File size: 1,361 Bytes

c60e096
7752cd2
 
c60e096
7752cd2
 
 
c60e096
7752cd2
3a81031
af1960a
 
7752cd2
af1960a
c60e096
7752cd2
3a81031
 
7752cd2
3a81031
 
 
 
7752cd2
 
c60e096
7752cd2
c60e096
e508c61
c60e096
3a81031
 
c60e096
 
 
 
7752cd2

import gradio as gr
from faster_whisper import WhisperModel
import logging

# Configure logging for debugging purposes
logging.basicConfig()
logging.getLogger("faster_whisper").setLevel(logging.DEBUG)

# Initialize the Whisper model with your desired configuration
model_size = "small"  # Choose the model size
device = "cpu"  # GPU : cuda  CPU : cpu
compute_type = "int8"  # GPU : float16 or int8 - CPU : int8

model = WhisperModel(model_size, device=device, compute_type=compute_type)

def transcribe(audio_file):
    # Transcribe the audio file without word-level timestamps
    segments, _ = model.transcribe(audio_file)
    
    # Format and gather transcription with segment timestamps
    transcription_with_timestamps = [
        f"[{segment.start:.2f}s - {segment.end:.2f}s] {segment.text}" for segment in segments
    ]
    
    return "\n".join(transcription_with_timestamps)

# Define the Gradio interface
iface = gr.Interface(fn=transcribe,
                     inputs=gr.Audio(sources="upload", type="filepath", label="Upload Audio"),
                     outputs="text",
                     title="Whisper Transcription with Line-by-Line Timestamps",
                     description="Upload an audio file to get transcription with line-by-line timestamps using Faster Whisper.")

# Launch the app
if __name__ == "__main__":
    iface.launch()