Spaces:
Sleeping
Sleeping
import gradio as gr | |
from faster_whisper import WhisperModel | |
# Function to load and initialize the Whisper model | |
def load_model(model_size): | |
model = WhisperModel(model_size, device="cpu", compute_type="int8") | |
return model | |
# Streaming transcription function | |
def transcribe_audio(model_size, audio_file): | |
# Initialize the model with the given size | |
model = load_model(model_size) | |
# Stream the transcription of the audio file | |
transcribed_text = "" | |
segments, info = model.transcribe(audio_file, beam_size=5) | |
# Yield detected language information first | |
yield f"Detected language: {info.language} (Probability: {info.language_probability:.2f})", transcribed_text.strip() | |
# Then yield each segment of transcribed text as it is processed | |
for segment in segments: | |
transcribed_text += segment.text + " " | |
yield "", transcribed_text.strip() # Empty string for language, we only update transcription | |
# Define the Gradio interface | |
interface = gr.Interface( | |
fn=transcribe_audio, # Function to transcribe audio | |
inputs=[ | |
gr.Textbox(label="Model Size (e.g., 'large-v3', 'medium', 'small')", value="large-v3"), # Input for model size | |
gr.Audio(type="filepath") # Upload audio file | |
], | |
outputs=[ | |
gr.Textbox(label="Detected Language"), | |
gr.Textbox(label="Transcription") | |
], # Output language and transcription | |
title="Whisper Transcription App", | |
description="Upload an audio file and specify the model size to transcribe it using WhisperModel." | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
interface.launch() | |