import gradio as gr from transformers import pipeline import torch import spaces pipe = pipeline( "automatic-speech-recognition", model="openai/whisper-large-v3-turbo", torch_dtype=torch.float16, device="cuda:0", ) @spaces.GPU def transcribe(audio, task): gr.Info("Starting transcription task") outputs = pipe( audio, chunk_length_s=30, batch_size=128, generate_kwargs={"task": task}, return_timestamps="word" ) gr.Info("Finished transcription task") return outputs['chunks'] with gr.Blocks() as demo: audio = gr.Audio(label="Audio", type="filepath", interactive=True) task = gr.Radio(["transcribe", "translate"], label="Task", value="transcribe", interactive=True) btn = gr.Button("Transcribe", variant="primary") output = gr.Textbox(label="Transcription", interactive=False) btn.click(transcribe, inputs=[audio, task], outputs=output) demo.queue().launch()