|
import gradio as gr |
|
import requests |
|
|
|
API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v2/whisper" |
|
API_KEY = "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" |
|
HEADERS = {"Authorization": f"Bearer {API_KEY}"} |
|
|
|
def transcribe_audio(audio_path: str) -> str: |
|
|
|
with open(audio_path, "rb") as f: |
|
audio_data = f.read() |
|
|
|
|
|
response = requests.post(API_URL, headers=HEADERS, data=audio_data) |
|
result = response.json() |
|
transcribed_text = result["text"] |
|
|
|
return transcribed_text |
|
|
|
audio_input = gr.inputs.Audio(type="filepath") |
|
text_output = gr.outputs.Textbox() |
|
|
|
iface = gr.Interface( |
|
fn=transcribe_audio, |
|
inputs=audio_input, |
|
outputs=text_output, |
|
title="Speech-to-Text using Whisper v2", |
|
description="Upload an audio file to transcribe it to text.", |
|
theme="Monochrome", |
|
live=True, |
|
capture_session=True, |
|
) |
|
|
|
iface.launch() |