Sermas / app.py
imorcillo's picture
small update
a5fcb05 verified
raw
history blame
2.16 kB
import gradio as gr
import re
import subprocess
import math
import shutil
import soundfile as sf
import tempfile
import os
import requests
import time
def _return_yt_html_embed(yt_url):
video_id = yt_url.split("?v=")[-1]
HTML_str = (
f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
" </center>"
)
return HTML_str
def transcribe_base(audio, language):
start_time = time.time()
d, sr = sf.read(audio)
data = {'audio': d.tolist(),
'sampling_rate': sr,
'language': language}
response = requests.post(os.getenv("api_url"), json=data).json()
result = response["text"]
end_time = time.time()
print("-"*50)
print(len(data["audio"])/float(sr))
print(end_time-start_time)
print("-"*50)
return result
def transcribe(audio_microphone, audio_upload, language):
print("Transcription request")
print(audio_microphone, audio_upload, language)
audio = audio_microphone if audio_microphone is not None else audio_upload
return transcribe_base(audio, language)
demo = gr.Blocks()
with demo:
gr.Markdown("# Speech recognition using Whisper models")
gr.Markdown("Orai NLP Technologies")
with gr.Tab("Trancribe Audio"):
iface = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(sources="microphone", type="filepath"),
gr.Audio(sources="upload", type="filepath"),
gr.Dropdown(choices=[("Basque", "eu"),
("Spanish", "es"),
("English", "en")],
#("French", "fr"),
#("Italian", "it"),
value="eu")
],
outputs=[
gr.Textbox(label="Transcription", autoscroll=False)
],
allow_flagging="never",
)
demo.queue(max_size=1)
demo.launch(share=False, max_threads=3, auth=(os.getenv("username"), os.getenv("password")), auth_message="Please provide a username and a password.")