VenkateshRoshan commited on
Commit
00e87f4
·
1 Parent(s): 55d906c

App Code updated

Browse files
Files changed (1) hide show
  1. app.py +16 -88
app.py CHANGED
@@ -84,84 +84,6 @@
84
  # if __name__ == '__main__' :
85
  # demo.launch()
86
 
87
- # import requests
88
- # import gradio as gr
89
- # import tempfile
90
- # import os
91
- # from transformers import pipeline
92
- # from huggingface_hub import InferenceClient
93
- # import time
94
- # import torch
95
-
96
- # device = "cuda" if torch.cuda.is_available() else "cpu"
97
-
98
-
99
- # model_id = "openai/whisper-large-v3"
100
- # client = InferenceClient(model_id)
101
- # pipe = pipeline("automatic-speech-recognition", model=model_id, device=device)
102
-
103
- # # def transcribe(inputs, task):
104
- # # if inputs is None:
105
- # # raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
106
-
107
- # # text = pipe(inputs, chunk_length_s=30)["text"]
108
- # # return text
109
-
110
- # def transcribe(inputs, task):
111
- # start = time.time()
112
- # if inputs is None:
113
- # raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
114
-
115
- # try:
116
-
117
- # res = client.automatic_speech_recognition(inputs).text
118
- # end = time.time() - start
119
- # return res, end
120
-
121
- # except Exception as e:
122
- # return fr'Error: {str(e)}'
123
-
124
-
125
- # demo = gr.Blocks()
126
-
127
- # time_taken = gr.Textbox(label="Time taken", type="text")
128
-
129
- # mf_transcribe = gr.Interface(
130
- # fn=transcribe,
131
- # inputs=[
132
- # gr.Audio(sources="microphone", type="filepath"),
133
- # gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
134
- # ],
135
- # outputs=["text", time_taken],
136
- # title="Whisper Large V3: Transcribe Audio",
137
- # description=(
138
- # "Transcribe long-form microphone or audio inputs with the click of a button!"
139
- # ),
140
- # allow_flagging="never",
141
- # )
142
-
143
- # file_transcribe = gr.Interface(
144
- # fn=transcribe,
145
- # inputs=[
146
- # gr.Audio(sources="upload", type="filepath", label="Audio file"),
147
- # gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
148
- # ],
149
- # outputs=["text", time_taken],
150
- # title="Whisper Large V3: Transcribe Audio",
151
- # description=(
152
- # "Transcribe long-form microphone or audio inputs with the click of a button!"
153
- # ),
154
- # allow_flagging="never",
155
- # )
156
-
157
-
158
-
159
- # with demo:
160
- # gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
161
-
162
- # if __name__ == "__main__":
163
- # demo.queue().launch()
164
-
165
  import requests
166
  import gradio as gr
167
  import tempfile
@@ -185,9 +107,11 @@ def transcribe(inputs, task, use_api):
185
 
186
  try:
187
  if use_api:
 
188
  # Use InferenceClient (API) if checkbox is checked
189
  res = client.automatic_speech_recognition(inputs).text
190
  else:
 
191
  # Use local pipeline if checkbox is unchecked
192
  res = pipe(inputs, chunk_length_s=30)["text"]
193
 
@@ -197,15 +121,19 @@ def transcribe(inputs, task, use_api):
197
  except Exception as e:
198
  return fr'Error: {str(e)}', None
199
 
 
 
 
200
  demo = gr.Blocks()
201
 
202
  mf_transcribe = gr.Interface(
203
  fn=transcribe,
204
  inputs=[
205
  gr.Audio(sources="microphone", type="filepath"),
206
- gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
 
207
  ],
208
- outputs=["text", "text"], # Placeholder for transcribed text and time taken
209
  title="Whisper Large V3: Transcribe Audio",
210
  description=(
211
  "Transcribe long-form microphone or audio inputs with the click of a button!"
@@ -217,9 +145,10 @@ file_transcribe = gr.Interface(
217
  fn=transcribe,
218
  inputs=[
219
  gr.Audio(sources="upload", type="filepath", label="Audio file"),
220
- gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
 
221
  ],
222
- outputs=["text", "text"], # Placeholder for transcribed text and time taken
223
  title="Whisper Large V3: Transcribe Audio",
224
  description=(
225
  "Transcribe long-form microphone or audio inputs with the click of a button!"
@@ -231,12 +160,11 @@ with demo:
231
  with gr.Row():
232
  # with gr.Column():
233
  # Group the tabs for microphone and file-based transcriptions
234
- gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
235
-
236
- with gr.Column():
237
- use_api_checkbox = gr.Checkbox(label="Use API", value=False) # Checkbox outside
238
- time_taken = gr.Textbox(label="Time taken", type="text") # Time taken outside the interfaces
239
 
 
 
 
240
 
241
  if __name__ == "__main__":
242
- demo.queue().launch()
 
84
  # if __name__ == '__main__' :
85
  # demo.launch()
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  import requests
88
  import gradio as gr
89
  import tempfile
 
107
 
108
  try:
109
  if use_api:
110
+ print(f'Using API for transcription...')
111
  # Use InferenceClient (API) if checkbox is checked
112
  res = client.automatic_speech_recognition(inputs).text
113
  else:
114
+ print(f'Using local pipeline for transcription...')
115
  # Use local pipeline if checkbox is unchecked
116
  res = pipe(inputs, chunk_length_s=30)["text"]
117
 
 
121
  except Exception as e:
122
  return fr'Error: {str(e)}', None
123
 
124
+ def calculate_time_taken(start_time):
125
+ return time.time() - start_time
126
+
127
  demo = gr.Blocks()
128
 
129
  mf_transcribe = gr.Interface(
130
  fn=transcribe,
131
  inputs=[
132
  gr.Audio(sources="microphone", type="filepath"),
133
+ # gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
134
+ gr.Checkbox(label="Use API", value=False)
135
  ],
136
+ outputs=["text",gr.Textbox(label="Time taken", type="text")], # Placeholder for transcribed text and time taken
137
  title="Whisper Large V3: Transcribe Audio",
138
  description=(
139
  "Transcribe long-form microphone or audio inputs with the click of a button!"
 
145
  fn=transcribe,
146
  inputs=[
147
  gr.Audio(sources="upload", type="filepath", label="Audio file"),
148
+ # gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
149
+ gr.Checkbox(label="Use API", value=False) # Checkbox for API usage
150
  ],
151
+ outputs=["text",gr.Textbox(label="Time taken", type="text")], # Placeholder for transcribed text and time taken
152
  title="Whisper Large V3: Transcribe Audio",
153
  description=(
154
  "Transcribe long-form microphone or audio inputs with the click of a button!"
 
160
  with gr.Row():
161
  # with gr.Column():
162
  # Group the tabs for microphone and file-based transcriptions
163
+ tab = gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
 
 
 
 
164
 
165
+ # with gr.Column():
166
+ # use_api_checkbox = gr.Checkbox(label="Use API", value=False) # Checkbox outside
167
+ # # time_taken = gr.Textbox(label="Time taken", type="text") # Time taken outside the interfaces
168
 
169
  if __name__ == "__main__":
170
+ demo.queue().launch()