import gradio as gr import requests API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v2/whisper" API_KEY = "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" HEADERS = {"Authorization": f"Bearer {API_KEY}"} def transcribe_audio(audio_path: str) -> str: # Read audio file with open(audio_path, "rb") as f: audio_data = f.read() # Make API request to OpenAI Whisper v2 API response = requests.post(API_URL, headers=HEADERS, data=audio_data) result = response.json() transcribed_text = result["text"] return transcribed_text audio_input = gr.inputs.Audio(type="filepath") text_output = gr.outputs.Textbox() iface = gr.Interface( fn=transcribe_audio, inputs=audio_input, outputs=text_output, title="Speech-to-Text using Whisper v2", description="Upload an audio file to transcribe it to text.", theme="Monochrome", live=True, capture_session=True, ) iface.launch()