from transformers import pipeline, WhisperProcessor import gradio as gr import time checkpoint_name = "openai/whisper-medium" p = pipeline("automatic-speech-recognition", model=checkpoint_name) processor = WhisperProcessor.from_pretrained(checkpoint_name) def transcribe(audio, language, task): forced_decoder_ids = processor.get_decoder_prompt_ids(language=language, task=task) mic_audio_transcription = p(audio, generate_kwargs={"forced_decoder_ids": forced_decoder_ids})["text"] if audio else "" return mic_audio_transcription gr.Interface( fn=transcribe, inputs=[gr.Audio(source="microphone", type="filepath"), gr.Radio(label="language", choices=['english', 'german', 'spanish', 'italian', 'french', 'polish'], value='english'), gr.Radio(label="task", choices=['transcribe', 'translate'], value='transcribe')], outputs=["text"]).launch()