Spaces:
Sleeping
Sleeping
from transformers import pipeline | |
import gradio as gr | |
from transformers import WhisperForConditionalGeneration | |
pipe = pipeline(model="matteocirca/whisper-small-it") | |
# pipe = pipeline(model="openai/whisper-small") | |
# model = WhisperForConditionalGeneration.from_pretrained("matteocirca/whisper-small-it") | |
segments = [] | |
def audio2segments(audio,word): | |
global segments | |
if not segments: | |
print(pipe(audio).keys()) | |
# segments = pipe(audio)["segments"] | |
# segments = model.transcribe(audio)["segments"] | |
elif not word: | |
return "No word detected" | |
else: | |
ranges = [] | |
for s in segments: | |
if word in s['text'].replace(',',' , ').split(" "): | |
ranges.append((s['start'],s['end'])) | |
res = "" | |
for i,r in enumerate(ranges): | |
res += f"{i}) {r[0]}-{r[1]}\n " | |
return res | |
def find_segment(): | |
global segments | |
res = "" | |
iface = gr.Interface( | |
fn=audio2segments, | |
inputs=[gr.Audio(sources=["microphone"], type="filepath"),"text"], | |
outputs="text", | |
title="Whisper Small Italian", | |
description="Realtime demo for Italian speech recognition using a fine-tuned Whisper small model.", | |
) | |
iface.launch() | |