bofenghuang commited on
Commit
45496e8
·
1 Parent(s): 8ecd0fd
Files changed (2) hide show
  1. README.md +1 -1
  2. run_demo_ct2.py +23 -26
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🤫
4
  colorFrom: indigo
5
  colorTo: red
6
  sdk: gradio
7
- sdk_version: 3.9.1
8
  app_file: app.py
9
  pinned: false
10
  tags:
 
4
  colorFrom: indigo
5
  colorTo: red
6
  sdk: gradio
7
+ sdk_version: 4.16.0
8
  app_file: app.py
9
  pinned: false
10
  tags:
run_demo_ct2.py CHANGED
@@ -42,6 +42,8 @@ GEN_KWARGS = {
42
  # "logprob_threshold": None,
43
  # vad threshold
44
  # "no_speech_threshold": None,
 
 
45
  }
46
 
47
  logging.basicConfig(
@@ -156,23 +158,24 @@ def infer(model, filename, with_timestamps, return_df=False):
156
  return text
157
 
158
 
159
- def transcribe(microphone, file_upload, with_timestamps, model_name=DEFAULT_MODEL_NAME):
160
- warn_output = ""
161
- if (microphone is not None) and (file_upload is not None):
162
- warn_output = (
163
- "WARNING: You've uploaded an audio file and used the microphone. "
164
- "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
165
- )
 
166
 
167
- elif (microphone is None) and (file_upload is None):
168
- return "ERROR: You have to either use the microphone or upload an audio file"
169
 
170
- file = microphone if microphone is not None else file_upload
171
 
172
  model = maybe_load_cached_pipeline(model_name)
173
- # text = model.transcribe(file, **GEN_KWARGS)["text"]
174
- # text = infer(model, file, with_timestamps)
175
- text = infer(model, file, with_timestamps, return_df=True)
176
 
177
  logger.info(f'Transcription by `{model_name}`:\n{text.to_json(orient="index", force_ascii=False, indent=2)}\n')
178
 
@@ -233,8 +236,9 @@ with gr.Blocks() as demo:
233
  """
234
  )
235
 
236
- microphone_input = gr.inputs.Audio(source="microphone", type="filepath", label="Record", optional=True)
237
- upload_input = gr.inputs.Audio(source="upload", type="filepath", label="Upload File", optional=True)
 
238
  with_timestamps_input = gr.Checkbox(label="With timestamps?")
239
 
240
  microphone_transcribe_btn = gr.Button("Transcribe Audio")
@@ -247,14 +251,11 @@ with gr.Blocks() as demo:
247
  text_output_df2 = gr.DataFrame(
248
  value=default_text_output_df,
249
  label="Transcription",
250
- row_count=(0, "dynamic"),
251
- max_rows=10,
252
  wrap=True,
253
- overflow_row_behaviour="paginate",
254
  )
255
 
256
  microphone_transcribe_btn.click(
257
- transcribe, inputs=[microphone_input, upload_input, with_timestamps_input], outputs=text_output_df2
258
  )
259
 
260
  # with gr.Tab("Transcribe YouTube"):
@@ -301,7 +302,7 @@ with gr.Blocks() as demo:
301
  """
302
  )
303
 
304
- yt_link_input = gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL")
305
  download_youtube_btn = gr.Button("Download Youtube video")
306
  downloaded_video_output = gr.Video(label="Video file", mirror_webcam=False)
307
  download_youtube_btn.click(download_video_from_youtube, inputs=[yt_link_input], outputs=[downloaded_video_output])
@@ -311,14 +312,10 @@ with gr.Blocks() as demo:
311
  text_output_df = gr.DataFrame(
312
  value=default_text_output_df,
313
  label="Transcription",
314
- row_count=(0, "dynamic"),
315
- max_rows=10,
316
  wrap=True,
317
- overflow_row_behaviour="paginate",
318
  )
319
 
320
  video_transcribe_btn.click(video_transcribe, inputs=[downloaded_video_output, with_timestamps_input3], outputs=[text_output_df])
321
 
322
- # demo.launch(server_name="0.0.0.0", debug=True)
323
- # demo.launch(server_name="0.0.0.0", debug=True, share=True)
324
- demo.launch(enable_queue=True)
 
42
  # "logprob_threshold": None,
43
  # vad threshold
44
  # "no_speech_threshold": None,
45
+ # "condition_on_previous_text": False, # todo: only for distilled version
46
+ "vad_filter": True,
47
  }
48
 
49
  logging.basicConfig(
 
158
  return text
159
 
160
 
161
+ # def transcribe(microphone, file_upload, with_timestamps, model_name=DEFAULT_MODEL_NAME):
162
+ def transcribe(audio_file_path, with_timestamps, model_name=DEFAULT_MODEL_NAME):
163
+ # warn_output = ""
164
+ # if (microphone is not None) and (file_upload is not None):
165
+ # warn_output = (
166
+ # "WARNING: You've uploaded an audio file and used the microphone. "
167
+ # "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
168
+ # )
169
 
170
+ # elif (microphone is None) and (file_upload is None):
171
+ # return "ERROR: You have to either use the microphone or upload an audio file"
172
 
173
+ # audio_file_path = microphone if microphone is not None else file_upload
174
 
175
  model = maybe_load_cached_pipeline(model_name)
176
+ # text = model.transcribe(audio_file_path, **GEN_KWARGS)["text"]
177
+ # text = infer(model, audio_file_path, with_timestamps)
178
+ text = infer(model, audio_file_path, with_timestamps, return_df=True)
179
 
180
  logger.info(f'Transcription by `{model_name}`:\n{text.to_json(orient="index", force_ascii=False, indent=2)}\n')
181
 
 
236
  """
237
  )
238
 
239
+ # microphone_input = gr.Audio(sources="microphone", type="filepath", label="Record")
240
+ # upload_input = gr.Audio(sources="upload", type="filepath", label="Upload File")
241
+ audio_file_path = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Record or upload file")
242
  with_timestamps_input = gr.Checkbox(label="With timestamps?")
243
 
244
  microphone_transcribe_btn = gr.Button("Transcribe Audio")
 
251
  text_output_df2 = gr.DataFrame(
252
  value=default_text_output_df,
253
  label="Transcription",
 
 
254
  wrap=True,
 
255
  )
256
 
257
  microphone_transcribe_btn.click(
258
+ transcribe, inputs=[audio_file_path, with_timestamps_input], outputs=text_output_df2
259
  )
260
 
261
  # with gr.Tab("Transcribe YouTube"):
 
302
  """
303
  )
304
 
305
+ yt_link_input = gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL")
306
  download_youtube_btn = gr.Button("Download Youtube video")
307
  downloaded_video_output = gr.Video(label="Video file", mirror_webcam=False)
308
  download_youtube_btn.click(download_video_from_youtube, inputs=[yt_link_input], outputs=[downloaded_video_output])
 
312
  text_output_df = gr.DataFrame(
313
  value=default_text_output_df,
314
  label="Transcription",
 
 
315
  wrap=True,
 
316
  )
317
 
318
  video_transcribe_btn.click(video_transcribe, inputs=[downloaded_video_output, with_timestamps_input3], outputs=[text_output_df])
319
 
320
+ # demo.queue(max_size=10).launch(server_name="0.0.0.0", debug=True, ssl_certfile="/home/bhuang/tools/cert.pem", ssl_keyfile="/home/bhuang/tools/key.pem", ssl_verify=False)
321
+ demo.queue(max_size=10).launch()