File size: 2,049 Bytes
0e97cdb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import gradio as gr
from transformers import pipeline

pipe2 = pipeline("automatic-speech-recognition", model="distil-whisper/distil-small.en")
pipe3 = pipeline("automatic-speech-recognition", model="antony66/whisper-large-v3-russian")

demo = gr.Blocks()


def transcribe_speech_english(filepath):
    if filepath is None:
        gr.Warning("No audio found, please retry.")
        return ""
    output = pipe2(filepath)
    return output["text"]


def transcribe_speech_russian(filepath):
    if filepath is None:
        gr.Warning("No audio found, please retry.")
        return ""
    output = pipe3(filepath)
    return output["text"]


mic_transcribe_english = gr.Interface(
    fn=transcribe_speech_english,
    inputs=gr.Audio(sources="microphone",
                    type="filepath"),
    outputs=gr.Textbox(label="Transcription",
                       lines=3),
    allow_flagging="never")


mic_transcribe_russian = gr.Interface(
    fn=transcribe_speech_russian,
    inputs=gr.Audio(sources="microphone",
                    type="filepath"),
    outputs=gr.Textbox(label="Transcription",
                       lines=3),
    allow_flagging="never")


file_transcribe_english = gr.Interface(
    fn=transcribe_speech_english,
    inputs=gr.Audio(sources="upload",
                    type="filepath"),
    outputs=gr.Textbox(label="Transcription",
                       lines=3),
    allow_flagging="never",
)


file_transcribe_russian = gr.Interface(
    fn=transcribe_speech_russian,
    inputs=gr.Audio(sources="upload",
                    type="filepath"),
    outputs=gr.Textbox(label="Transcription",
                       lines=3),
    allow_flagging="never",
)


with demo:
    gr.TabbedInterface(
        [mic_transcribe_english,
         file_transcribe_english,
         mic_transcribe_russian,
         file_transcribe_russian],
        ["Transcribe Microphone English",
         "Transcribe Audio File English",
         "Transcribe Microphone Russian",
         "Transcribe Audio File Russian"],
    )

demo.launch()