Spaces:
Running
Running
File size: 1,644 Bytes
6c226f9 8e787d3 6c226f9 d790c0b 88183ad 6c226f9 a5bfe25 9d6fa91 66efbc3 6c226f9 3c0cd8e bab1585 6c226f9 5208902 3c0cd8e 6c226f9 13e0565 1faae08 13e0565 6c226f9 bab1585 6c226f9 5208902 3c0cd8e 53b8fc6 bab1585 3c0cd8e 6c226f9 9c4478d 6c226f9 5208902 7097513 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import torch
import gradio as gr
import yt_dlp as youtube_dl
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read
import tempfile
import os
MODEL_NAME = "openai/whisper-large-v3"
BATCH_SIZE = 8
FILE_LIMIT_MB = 1000
device = 0 if torch.cuda.is_available() else "cpu"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=30,
device=device,
)
def transcribe(inputs, task):
if inputs is None:
raise gr.Error("Cap fitxer d'脿udio introduit! Si us plau pengeu un fitxer "\
"o enregistreu un 脿udio abans d'enviar la vostra sol路licitud")
text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
return text
description_string = "Transcripci贸 autom脿tica de micr貌fon o de fitxers d'脿udio.\n Aquest demostrador s'ha desenvolupat per"\
" comprovar els models de reconeixement de parla per a m贸bils. Per ara utilitza el checkpoint "\
f"[{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) i la llibreria de 馃 Transformers per a la transcripci贸."
file_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(sources=["upload", "microphone"], type="filepath", label="Audio"),
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
],
outputs="text",
title="Transcripci贸 autom脿tica d'脿udio",
description=(description_string),
allow_flagging="never",
)
demo = gr.TabbedInterface([file_transcribe], ["Fitxer"])
if __name__ == "__main__":
demo.launch()
|