|
import os |
|
os.system("pip install git+https://github.com/openai/whisper.git") |
|
import gradio |
|
import whisper |
|
|
|
model = whisper.load_model("base") |
|
|
|
def transcribe_audio(audio): |
|
|
|
audio = whisper.load_audio(audio) |
|
audio = whisper.pad_or_trim(audio) |
|
|
|
|
|
mel = whisper.log_mel_spectrogram(audio).to(model.device) |
|
|
|
|
|
_, probs = model.detect_language(mel) |
|
|
|
|
|
options = whisper.DecodingOptions(fp16 = False) |
|
result = whisper.decode(model, mel, options) |
|
|
|
return result.text |
|
|
|
title = "Automatic Speech Recognition" |
|
description = "Speech to Text Conversion using whisper" |
|
|
|
|
|
in_prompt = gradio.components.Audio(source="microphone", type="filepath") |
|
|
|
|
|
out_response = gradio.components.Textbox(label='Text') |
|
|
|
|
|
iface = gradio.Interface(fn=transcribe_audio, |
|
inputs = in_prompt, |
|
outputs = out_response, |
|
title=title, |
|
description=description, |
|
live=True |
|
) |
|
|
|
iface.launch(debug = True) |