import os os.system("pip install git+https://github.com/openai/whisper.git") import gradio import whisper model = whisper.load_model("base") def transcribe_audio(audio): # Load the audio and trim/pad it to fit for 30 seconds audio = whisper.load_audio(audio) audio = whisper.pad_or_trim(audio) # Make mel log spectrogram mel = whisper.log_mel_spectrogram(audio).to(model.device) # Detect the spoken language _, probs = model.detect_language(mel) # Decode the audio options = whisper.DecodingOptions(fp16 = False) result = whisper.decode(model, mel, options) return result.text title = "Automatic Speech Recognition" description = "Speech to Text Conversion using whisper" # Input from user in_prompt = gradio.components.Audio(source="microphone", type="filepath") # Output response out_response = gradio.components.Textbox(label='Text') # Gradio interface to generate UI link iface = gradio.Interface(fn=transcribe_audio, inputs = in_prompt, outputs = out_response, title=title, description=description, live=True ) iface.launch(debug = True)