File size: 1,628 Bytes
927f0e3
 
 
 
 
 
 
 
668efc4
927f0e3
 
 
 
668efc4
 
927f0e3
668efc4
 
 
927f0e3
668efc4
927f0e3
 
668efc4
927f0e3
 
 
 
 
 
 
 
 
 
 
 
 
58dc523
927f0e3
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import gradio as gr
from faster_whisper import WhisperModel

# Function to load and initialize the Whisper model
def load_model(model_size):
    model = WhisperModel(model_size, device="cpu", compute_type="int8")
    return model

# Streaming transcription function
def transcribe_audio(model_size, audio_file):
    # Initialize the model with the given size
    model = load_model(model_size)
    
    # Stream the transcription of the audio file
    transcribed_text = ""
    segments, info = model.transcribe(audio_file, beam_size=5)

    # Yield detected language information first
    yield f"Detected language: {info.language} (Probability: {info.language_probability:.2f})", transcribed_text.strip()
    
    # Then yield each segment of transcribed text as it is processed
    for segment in segments:
        transcribed_text += segment.text + " "
        yield "", transcribed_text.strip()  # Empty string for language, we only update transcription

# Define the Gradio interface
interface = gr.Interface(
    fn=transcribe_audio,  # Function to transcribe audio
    inputs=[
        gr.Textbox(label="Model Size (e.g., 'large-v3', 'medium', 'small')", value="large-v3"),  # Input for model size
        gr.Audio(type="filepath")  # Upload audio file
    ],
    outputs=[
        gr.Textbox(label="Detected Language"), 
        gr.Textbox(label="Transcription")
    ],  # Output language and transcription
    title="Whisper Transcription App",
    description="Upload an audio file and specify the model size to transcribe it using WhisperModel."
)

# Launch the app
if __name__ == "__main__":
    interface.launch()