File size: 2,165 Bytes
6c226f9
 
 
8e787d3
6c226f9
d790c0b
 
 
88183ad
6c226f9
a5bfe25
9d6fa91
66efbc3
6c226f9
 
 
 
 
 
 
 
 
 
3c0cd8e
 
bab1585
 
6c226f9
3c0cd8e
 
6c226f9
 
 
bab1585
 
 
6c226f9
bab1585
6c226f9
 
bab1585
3c0cd8e
 
 
 
 
bab1585
 
3c0cd8e
 
 
bab1585
3c0cd8e
 
bab1585
609dcbe
6c226f9
 
 
 
a5bfe25
bab1585
6c226f9
 
 
 
bab1585
6c226f9
7097513
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import torch

import gradio as gr
import yt_dlp as youtube_dl
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read

import tempfile
import os

MODEL_NAME = "openai/whisper-large-v3"
BATCH_SIZE = 8
FILE_LIMIT_MB = 1000

device = 0 if torch.cuda.is_available() else "cpu"

pipe = pipeline(
    task="automatic-speech-recognition",
    model=MODEL_NAME,
    chunk_length_s=30,
    device=device,
)

def transcribe(inputs, task):
    if inputs is None:
        raise gr.Error("Cap fitxer d'脿udio introduit! Si us plau pengeu un fitxer "\
                       "o enregistreu un 脿udio abans d'enviar la vostra sol路licitud")

    text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
    return  text


demo = gr.Blocks()
description_string = "Transcripci贸 automatica de micr貌fon o de fitxers d'audio.\n Aquest demostrador est谩 desenvolupat per"\
              " comprovar els models de reconeixement de parla pels m贸bils. Per ara utilitza el checkpoint "\
              f"[{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) i la llibreria de 馃 Transformers per la transcripci贸."

file_transcribe = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.inputs.Audio(source="upload", type="filepath", optional=True, label="Audio file"),
        gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
    ],
    outputs="text",
    layout="horizontal",
    theme="huggingface",
    title="Transcriure 脌udio",
    description=(description_string),
    allow_flagging="never",
)

mf_transcribe = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.inputs.Audio(source="microphone", type="filepath", optional=True),
        gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
    ],
    outputs="text",
    layout="horizontal",
    theme="huggingface",
    title="Whisper Large V3: Transcribe Audio",
    description=(description_string),
    allow_flagging="never",
)

with demo:
    gr.TabbedInterface([file_transcribe, mf_transcribe], ["Fitxer d'脌udio", "Micr貌fon"])

demo.launch(enable_queue=True)