ASR-app-pro / app.py
matteocirca's picture
Update app
e8aa27e
raw
history blame
1.23 kB
from transformers import pipeline
import gradio as gr
from transformers import WhisperForConditionalGeneration
pipe = pipeline(model="matteocirca/whisper-small-it")
# pipe = pipeline(model="openai/whisper-small")
# model = WhisperForConditionalGeneration.from_pretrained("matteocirca/whisper-small-it")
segments = []
def audio2segments(audio,word):
global segments
if not segments:
print(pipe(audio).keys())
# segments = pipe(audio)["segments"]
# segments = model.transcribe(audio)["segments"]
elif not word:
return "No word detected"
else:
ranges = []
for s in segments:
if word in s['text'].replace(',',' , ').split(" "):
ranges.append((s['start'],s['end']))
res = ""
for i,r in enumerate(ranges):
res += f"{i}) {r[0]}-{r[1]}\n "
return res
def find_segment():
global segments
res = ""
iface = gr.Interface(
fn=audio2segments,
inputs=[gr.Audio(sources=["microphone"], type="filepath"),"text"],
outputs="text",
title="Whisper Small Italian",
description="Realtime demo for Italian speech recognition using a fine-tuned Whisper small model.",
)
iface.launch()