Spaces:
Sleeping
Sleeping
File size: 3,050 Bytes
69fe4c0 ed7d1fd 69fe4c0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import json
import requests
from datetime import datetime
import time
import traceback
API_URL = "https://api-inference.huggingface.co/models/"
def date_now():
return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
def record_opt(msg):
return f"{date_now()} {msg}\n"
def speech_recognize(audio, model_name, hf_token, opt):
opt += record_opt("转录开始 ...")
yield "转录中,请稍等...", opt
start = time.monotonic()
with open(audio, "rb") as f:
data = f.read()
try:
url = API_URL + model_name
print(f">>> url is {url}")
headers = {"Authorization": f"Bearer {hf_token}"}
response = requests.request("POST", url, headers=headers, data=data)
text = json.loads(response.content.decode("utf-8"))
print(f">>> text is {text}")
text = text['text']
except:
text = f"转录失败:\n{traceback.format_exc()}"
cost = time.monotonic() - start
opt += record_opt(f"转录结束,耗时{cost:.3f}s")
yield text, opt
import gradio as gr
with gr.Blocks() as demo:
gr.HTML("""<h2 align="center">Automatic Speech Recognition (OpenAI Whisper with Inference API)</h2>""")
with gr.Row():
gr.Markdown(
"""🤗 调用 huggingface API,使用 OpenAI Whisper 模型进行语音识别,也可以成为语音转文本(Speech to Text, STT)
👉 目的是练习使用 Gradio Audio 组件和探索使用 Huggingface Inference API
"""
)
with gr.Row():
with gr.Column():
audio = gr.Audio(source="microphone", type="filepath")
model_name = gr.Dropdown(
label="选择模型",
choices=[
"openai/whisper-large-v2",
"openai/whisper-large",
"openai/whisper-medium",
"openai/whisper-small",
"openai/whisper-base",
"openai/whisper-tiny",
],
value="openai/whisper-large-v2",
)
hf_token = gr.Textbox(label="Huggingface token")
with gr.Column():
output = gr.Textbox(label="转录结果")
operation = gr.Textbox(label="组件操作历史")
audio.start_recording(
lambda x: x + record_opt("开始录音 ..."),
inputs=operation, outputs=operation
)
audio.play(
lambda x: x + record_opt("播放录音"),
inputs=operation, outputs=operation
)
audio.pause(
lambda x: x + record_opt("暂停播放"),
inputs=operation, outputs=operation
)
audio.stop(
lambda x: x + record_opt("停止播放"),
inputs=operation, outputs=operation
)
audio.end(
lambda x: x + record_opt("播放完毕"),
inputs=operation, outputs=operation
)
audio.stop_recording(speech_recognize, inputs=[audio, model_name, hf_token, operation], outputs=[output, operation])
demo.queue(max_size=4, concurrency_count=4)
demo.launch()
|