Transcriber / app.py
DrDemon's picture
Update app.py
58dc523 verified
import gradio as gr
from faster_whisper import WhisperModel
# Function to load and initialize the Whisper model
def load_model(model_size):
model = WhisperModel(model_size, device="cpu", compute_type="int8")
return model
# Streaming transcription function
def transcribe_audio(model_size, audio_file):
# Initialize the model with the given size
model = load_model(model_size)
# Stream the transcription of the audio file
transcribed_text = ""
segments, info = model.transcribe(audio_file, beam_size=5)
# Yield detected language information first
yield f"Detected language: {info.language} (Probability: {info.language_probability:.2f})", transcribed_text.strip()
# Then yield each segment of transcribed text as it is processed
for segment in segments:
transcribed_text += segment.text + " "
yield "", transcribed_text.strip() # Empty string for language, we only update transcription
# Define the Gradio interface
interface = gr.Interface(
fn=transcribe_audio, # Function to transcribe audio
inputs=[
gr.Textbox(label="Model Size (e.g., 'large-v3', 'medium', 'small')", value="large-v3"), # Input for model size
gr.Audio(type="filepath") # Upload audio file
],
outputs=[
gr.Textbox(label="Detected Language"),
gr.Textbox(label="Transcription")
], # Output language and transcription
title="Whisper Transcription App",
description="Upload an audio file and specify the model size to transcribe it using WhisperModel."
)
# Launch the app
if __name__ == "__main__":
interface.launch()