VenkateshRoshan commited on
Commit
55d906c
·
1 Parent(s): 5abb6bc

App Code updated

Browse files
Files changed (1) hide show
  1. app.py +130 -40
app.py CHANGED
@@ -84,69 +84,159 @@
84
  # if __name__ == '__main__' :
85
  # demo.launch()
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  import requests
88
  import gradio as gr
89
  import tempfile
90
  import os
91
  from transformers import pipeline
92
  from huggingface_hub import InferenceClient
 
 
 
 
 
93
 
94
  model_id = "openai/whisper-large-v3"
95
  client = InferenceClient(model_id)
96
- pipe = pipeline("automatic-speech-recognition", model=model_id)
97
 
98
- # def transcribe(inputs, task):
99
- # if inputs is None:
100
- # raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
101
-
102
- # text = pipe(inputs, chunk_length_s=30)["text"]
103
- # return text
104
-
105
- def transcribe(inputs, task):
106
  if inputs is None:
107
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
108
 
109
  try:
110
-
111
- res = client.automatic_speech_recognition(inputs).text
112
- return res
 
 
 
 
 
 
113
 
114
  except Exception as e:
115
- return fr'Error: {str(e)}'
116
-
117
 
118
  demo = gr.Blocks()
119
 
120
  mf_transcribe = gr.Interface(
121
- fn=transcribe,
122
- inputs=[
123
- gr.Audio(sources="microphone", type="filepath"),
124
- gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
125
- ],
126
- outputs="text",
127
- title="Whisper Large V3: Transcribe Audio",
128
- description=(
129
- "Transcribe long-form microphone or audio inputs with the click of a button!"
130
- ),
131
- allow_flagging="never",
132
- )
133
 
134
  file_transcribe = gr.Interface(
135
- fn=transcribe,
136
- inputs=[
137
- gr.Audio(sources="upload", type="filepath", label="Audio file"),
138
- gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
139
- ],
140
- outputs="text",
141
- title="Whisper Large V3: Transcribe Audio",
142
- description=(
143
- "Transcribe long-form microphone or audio inputs with the click of a button!"
144
- ),
145
- allow_flagging="never",
146
- )
147
 
148
  with demo:
149
- gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
 
 
 
 
 
 
 
 
150
 
151
  if __name__ == "__main__":
152
- demo.queue().launch()
 
84
  # if __name__ == '__main__' :
85
  # demo.launch()
86
 
87
+ # import requests
88
+ # import gradio as gr
89
+ # import tempfile
90
+ # import os
91
+ # from transformers import pipeline
92
+ # from huggingface_hub import InferenceClient
93
+ # import time
94
+ # import torch
95
+
96
+ # device = "cuda" if torch.cuda.is_available() else "cpu"
97
+
98
+
99
+ # model_id = "openai/whisper-large-v3"
100
+ # client = InferenceClient(model_id)
101
+ # pipe = pipeline("automatic-speech-recognition", model=model_id, device=device)
102
+
103
+ # # def transcribe(inputs, task):
104
+ # # if inputs is None:
105
+ # # raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
106
+
107
+ # # text = pipe(inputs, chunk_length_s=30)["text"]
108
+ # # return text
109
+
110
+ # def transcribe(inputs, task):
111
+ # start = time.time()
112
+ # if inputs is None:
113
+ # raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
114
+
115
+ # try:
116
+
117
+ # res = client.automatic_speech_recognition(inputs).text
118
+ # end = time.time() - start
119
+ # return res, end
120
+
121
+ # except Exception as e:
122
+ # return fr'Error: {str(e)}'
123
+
124
+
125
+ # demo = gr.Blocks()
126
+
127
+ # time_taken = gr.Textbox(label="Time taken", type="text")
128
+
129
+ # mf_transcribe = gr.Interface(
130
+ # fn=transcribe,
131
+ # inputs=[
132
+ # gr.Audio(sources="microphone", type="filepath"),
133
+ # gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
134
+ # ],
135
+ # outputs=["text", time_taken],
136
+ # title="Whisper Large V3: Transcribe Audio",
137
+ # description=(
138
+ # "Transcribe long-form microphone or audio inputs with the click of a button!"
139
+ # ),
140
+ # allow_flagging="never",
141
+ # )
142
+
143
+ # file_transcribe = gr.Interface(
144
+ # fn=transcribe,
145
+ # inputs=[
146
+ # gr.Audio(sources="upload", type="filepath", label="Audio file"),
147
+ # gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
148
+ # ],
149
+ # outputs=["text", time_taken],
150
+ # title="Whisper Large V3: Transcribe Audio",
151
+ # description=(
152
+ # "Transcribe long-form microphone or audio inputs with the click of a button!"
153
+ # ),
154
+ # allow_flagging="never",
155
+ # )
156
+
157
+
158
+
159
+ # with demo:
160
+ # gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
161
+
162
+ # if __name__ == "__main__":
163
+ # demo.queue().launch()
164
+
165
  import requests
166
  import gradio as gr
167
  import tempfile
168
  import os
169
  from transformers import pipeline
170
  from huggingface_hub import InferenceClient
171
+ import time
172
+ import torch
173
+
174
+ # Ensure CUDA is available and set device accordingly
175
+ # device = 0 if torch.cuda.is_available() else -1
176
 
177
  model_id = "openai/whisper-large-v3"
178
  client = InferenceClient(model_id)
179
+ pipe = pipeline("automatic-speech-recognition", model=model_id) #, device=device)
180
 
181
+ def transcribe(inputs, task, use_api):
182
+ start = time.time()
 
 
 
 
 
 
183
  if inputs is None:
184
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
185
 
186
  try:
187
+ if use_api:
188
+ # Use InferenceClient (API) if checkbox is checked
189
+ res = client.automatic_speech_recognition(inputs).text
190
+ else:
191
+ # Use local pipeline if checkbox is unchecked
192
+ res = pipe(inputs, chunk_length_s=30)["text"]
193
+
194
+ end = time.time() - start
195
+ return res, end
196
 
197
  except Exception as e:
198
+ return fr'Error: {str(e)}', None
 
199
 
200
  demo = gr.Blocks()
201
 
202
  mf_transcribe = gr.Interface(
203
+ fn=transcribe,
204
+ inputs=[
205
+ gr.Audio(sources="microphone", type="filepath"),
206
+ gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
207
+ ],
208
+ outputs=["text", "text"], # Placeholder for transcribed text and time taken
209
+ title="Whisper Large V3: Transcribe Audio",
210
+ description=(
211
+ "Transcribe long-form microphone or audio inputs with the click of a button!"
212
+ ),
213
+ allow_flagging="never",
214
+ )
215
 
216
  file_transcribe = gr.Interface(
217
+ fn=transcribe,
218
+ inputs=[
219
+ gr.Audio(sources="upload", type="filepath", label="Audio file"),
220
+ gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
221
+ ],
222
+ outputs=["text", "text"], # Placeholder for transcribed text and time taken
223
+ title="Whisper Large V3: Transcribe Audio",
224
+ description=(
225
+ "Transcribe long-form microphone or audio inputs with the click of a button!"
226
+ ),
227
+ allow_flagging="never",
228
+ )
229
 
230
  with demo:
231
+ with gr.Row():
232
+ # with gr.Column():
233
+ # Group the tabs for microphone and file-based transcriptions
234
+ gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
235
+
236
+ with gr.Column():
237
+ use_api_checkbox = gr.Checkbox(label="Use API", value=False) # Checkbox outside
238
+ time_taken = gr.Textbox(label="Time taken", type="text") # Time taken outside the interfaces
239
+
240
 
241
  if __name__ == "__main__":
242
+ demo.queue().launch()