0xharib commited on
Commit
2ae2448
·
1 Parent(s): 8d38f83

Create new file

Browse files
Files changed (1) hide show
  1. app.py +164 -0
app.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.system("pip install git+https://github.com/openai/whisper.git")
3
+ import gradio as gr
4
+ import whisper
5
+
6
+ model = whisper.load_model("small")
7
+
8
+ def transcription(audio):
9
+ result = model.transcribe(audio)
10
+ return result['text']
11
+
12
+
13
+ title = "Whisper Demo with File Uploads - Using Small Model"
14
+
15
+ description="Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse audio and is also a multi-task model that can perform multilingual speech recognition as well as speech translation and language identification."
16
+
17
+ css = """
18
+ .gradio-container {
19
+ font-family: 'IBM Plex Sans', sans-serif;
20
+ }
21
+ .gr-button {
22
+ color: white;
23
+ border-color: black;
24
+ background: black;
25
+ }
26
+ input[type='range'] {
27
+ accent-color: black;
28
+ }
29
+ .dark input[type='range'] {
30
+ accent-color: #dfdfdf;
31
+ }
32
+ .container {
33
+ max-width: 730px;
34
+ margin: auto;
35
+ padding-top: 1.5rem;
36
+ }
37
+
38
+ .details:hover {
39
+ text-decoration: underline;
40
+ }
41
+ .gr-button {
42
+ white-space: nowrap;
43
+ }
44
+ .gr-button:focus {
45
+ border-color: rgb(147 197 253 / var(--tw-border-opacity));
46
+ outline: none;
47
+ box-shadow: var(--tw-ring-offset-shadow), var(--tw-ring-shadow), var(--tw-shadow, 0 0 #0000);
48
+ --tw-border-opacity: 1;
49
+ --tw-ring-offset-shadow: var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) var(--tw-ring-offset-color);
50
+ --tw-ring-shadow: var(--tw-ring-inset) 0 0 0 calc(3px var(--tw-ring-offset-width)) var(--tw-ring-color);
51
+ --tw-ring-color: rgb(191 219 254 / var(--tw-ring-opacity));
52
+ --tw-ring-opacity: .5;
53
+ }
54
+ .footer {
55
+ margin-bottom: 45px;
56
+ margin-top: 35px;
57
+ text-align: center;
58
+ border-bottom: 1px solid #e5e5e5;
59
+ }
60
+ .footer>p {
61
+ font-size: .8rem;
62
+ display: inline-block;
63
+ padding: 0 10px;
64
+ transform: translateY(10px);
65
+ background: white;
66
+ }
67
+ .dark .footer {
68
+ border-color: #303030;
69
+ }
70
+ .dark .footer>p {
71
+ background: #0b0f19;
72
+ }
73
+ .prompt h4{
74
+ margin: 1.25em 0 .25em 0;
75
+ font-weight: bold;
76
+ font-size: 115%;
77
+ }
78
+ """
79
+
80
+ block = gr.Blocks(css=css)
81
+
82
+
83
+
84
+ with block:
85
+ gr.HTML(
86
+ """
87
+ <div style="text-align: center; max-width: 650px; margin: 0 auto;">
88
+ <div
89
+ style="
90
+ display: inline-flex;
91
+ align-items: center;
92
+ gap: 0.8rem;
93
+ font-size: 1.75rem;
94
+ "
95
+ >
96
+ <svg
97
+ width="0.65em"
98
+ height="0.65em"
99
+ viewBox="0 0 115 115"
100
+ fill="none"
101
+ xmlns="http://www.w3.org/2000/svg"
102
+ >
103
+ <rect width="23" height="23" fill="white"></rect>
104
+ <rect y="69" width="23" height="23" fill="white"></rect>
105
+ <rect x="23" width="23" height="23" fill="#AEAEAE"></rect>
106
+ <rect x="23" y="69" width="23" height="23" fill="#AEAEAE"></rect>
107
+ <rect x="46" width="23" height="23" fill="white"></rect>
108
+ <rect x="46" y="69" width="23" height="23" fill="white"></rect>
109
+ <rect x="69" width="23" height="23" fill="black"></rect>
110
+ <rect x="69" y="69" width="23" height="23" fill="black"></rect>
111
+ <rect x="92" width="23" height="23" fill="#D9D9D9"></rect>
112
+ <rect x="92" y="69" width="23" height="23" fill="#AEAEAE"></rect>
113
+ <rect x="115" y="46" width="23" height="23" fill="white"></rect>
114
+ <rect x="115" y="115" width="23" height="23" fill="white"></rect>
115
+ <rect x="115" y="69" width="23" height="23" fill="#D9D9D9"></rect>
116
+ <rect x="92" y="46" width="23" height="23" fill="#AEAEAE"></rect>
117
+ <rect x="92" y="115" width="23" height="23" fill="#AEAEAE"></rect>
118
+ <rect x="92" y="69" width="23" height="23" fill="white"></rect>
119
+ <rect x="69" y="46" width="23" height="23" fill="white"></rect>
120
+ <rect x="69" y="115" width="23" height="23" fill="white"></rect>
121
+ <rect x="69" y="69" width="23" height="23" fill="#D9D9D9"></rect>
122
+ <rect x="46" y="46" width="23" height="23" fill="black"></rect>
123
+ <rect x="46" y="115" width="23" height="23" fill="black"></rect>
124
+ <rect x="46" y="69" width="23" height="23" fill="black"></rect>
125
+ <rect x="23" y="46" width="23" height="23" fill="#D9D9D9"></rect>
126
+ <rect x="23" y="115" width="23" height="23" fill="#AEAEAE"></rect>
127
+ <rect x="23" y="69" width="23" height="23" fill="black"></rect>
128
+ </svg>
129
+ <h1 style="font-weight: 900; margin-bottom: 7px;">
130
+ Whisper Demo - Small Model, File Upload
131
+ </h1>
132
+ </div>
133
+ <p style="margin-bottom: 10px; font-size: 94%">
134
+ Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse audio and is also a multi-task model that can perform multilingual speech recognition as well as speech translation and language identification.
135
+ </p>
136
+ </div>
137
+ """
138
+ )
139
+ with gr.Group():
140
+ with gr.Box():
141
+ with gr.Row().style(mobile_collapse=False, equal_height=True):
142
+ audio = gr.Audio(
143
+ label="Upload File",
144
+ show_label=False,
145
+ source="audio",
146
+ type="filepath"
147
+ )
148
+
149
+ btn = gr.Button("Transcribe")
150
+ text = gr.Textbox(show_label=False)
151
+
152
+
153
+
154
+
155
+ btn.click(inference, inputs=[audio], outputs=[text])
156
+
157
+ gr.HTML('''
158
+ <div class="footer">
159
+ <p>Forked from <a href="https://github.com/openai/whisper" style="text-decoration: underline;" target="_blank">OpenAI</a> - Gradio Demo by 🤗 Hugging Face
160
+ </p>
161
+ </div>
162
+ ''')
163
+
164
+ block.launch()