File size: 629 Bytes
d7aa11b
 
f3c7107
d7aa11b
 
 
 
 
f3c7107
d7aa11b
 
 
f3c7107
d7aa11b
 
f3c7107
d7aa11b
 
 
 
 
f3c7107
d7aa11b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import whisper
import gradio as gr

def transcribe_audio(file_info):
    model = whisper.load_model("base")  # Choose the appropriate model size
    audio = whisper.load_audio(file_info.name)
    audio = whisper.pad_or_trim(audio)
    mel = whisper.log_mel_spectrogram(audio).to(model.device)

    _, probs = model.detect_language(mel)
    language = max(probs, key=probs.get)
    print(f"Detected language: {language}")

    result = model.transcribe(mel)
    return result["text"]

iface = gr.Interface(
    fn=transcribe_audio,
    inputs=gr.inputs.Audio(source="microphone", type="file"),
    outputs="text"
)

iface.launch()