Spaces:
Runtime error
Runtime error
File size: 1,564 Bytes
c1701de e9de351 c1701de e9de351 c1701de e9de351 c1701de e9de351 c1701de e9de351 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
from transformers import pipeline
import gradio as gr
import librosa
import torch
if torch.cuda.is_available():
device = torch.device("cuda")
elif (
hasattr(torch.backends, "mps")
and torch.backends.mps.is_available()
and torch.backends.mps.is_built()
):
device = torch.device("mps")
else:
device = torch.device("cpu")
pipe1 = pipeline(
"automatic-speech-recognition",
model="base",
tokenizer="openai/whisper-base",
chunk_length_s=26,
device=device,
stride_length_s=(4, 2),
)
pipe2 = pipeline(
"automatic-speech-recognition",
model="tiny",
tokenizer="openai/whisper-tiny",
chunk_length_s=26,
device=device,
stride_length_s=(4, 2),
)
def transcribe(audio, x, model):
if audio == None:
sample = librosa.load(x, sr=16_000, mono=True)[0]
else:
sample = librosa.load(audio, sr=16_000, mono=True)[0]
if model == "base":
transcription_whspr = pipe1(sample, batch_size=8)["text"]
elif model == "tiny":
transcription_whspr = pipe2(sample, batch_size=8)["text"]
return transcription_whspr
iface = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(source="microphone", type="filepath"),
gr.Audio(source="upload", type="filepath"),
gr.Dropdown(
choices=["base", "tiny"],
info="model k wuzwolenju",
value="base",
),
],
outputs="text",
title="Serbski STT",
description="Gradio demo za spóznawanje rěće w hornjoserbšćinje",
)
iface.launch(debug=True)
|