File size: 1,681 Bytes
6c226f9
 
 
8e787d3
6c226f9
d790c0b
5208902
d790c0b
 
88183ad
6c226f9
a5bfe25
9d6fa91
66efbc3
6c226f9
 
 
 
 
 
 
 
 
 
3c0cd8e
 
bab1585
 
6c226f9
5208902
3c0cd8e
 
6c226f9
 
bab1585
 
 
6c226f9
bab1585
6c226f9
 
5208902
 
3c0cd8e
 
53b8fc6
bab1585
3c0cd8e
 
 
6c226f9
5208902
6c226f9
5208902
 
7097513
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import torch

import gradio as gr
import yt_dlp as youtube_dl
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read
from AinaTheme import theme

import tempfile
import os

MODEL_NAME = "openai/whisper-large-v3"
BATCH_SIZE = 8
FILE_LIMIT_MB = 1000

device = 0 if torch.cuda.is_available() else "cpu"

pipe = pipeline(
    task="automatic-speech-recognition",
    model=MODEL_NAME,
    chunk_length_s=30,
    device=device,
)

def transcribe(inputs, task):
    if inputs is None:
        raise gr.Error("Cap fitxer d'脿udio introduit! Si us plau pengeu un fitxer "\
                       "o enregistreu un 脿udio abans d'enviar la vostra sol路licitud")


    text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
    return  text


description_string = "Transcripci贸 automatica de micr貌fon o de fitxers d'audio.\n Aquest demostrador est谩 desenvolupat per"\
              " comprovar els models de reconeixement de parla pels m贸bils. Per ara utilitza el checkpoint "\
              f"[{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) i la llibreria de 馃 Transformers per la transcripci贸."

file_transcribe = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.Audio(sources=["upload", "microphone"], type="filepath", label="Audio"),
        gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
    ],
    outputs="text",
    title="Transcripci贸 autom脿tica d'脿udio",
    description=(description_string),
    allow_flagging="never",
)


demo = gr.TabbedInterface([file_transcribe], ["Fitxer"], theme=theme)

if __name__ == "__main__":
    demo.launch()