cstr commited on
Commit
d5bbd76
1 Parent(s): 1c4f98a
Files changed (1) hide show
  1. app.py +42 -32
app.py CHANGED
@@ -260,37 +260,47 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
260
  except:
261
  pass
262
 
263
- iface = gr.Interface(
264
- fn=transcribe_audio,
265
- inputs=[
266
- gr.Textbox(label="Audio Source (Upload, URL, or YouTube URL)"),
267
- gr.Dropdown(choices=["faster-batched", "faster-sequenced", "transformers"], label="Pipeline Type", value="faster-batched"),
268
- gr.Dropdown(label="Model", choices=get_model_options("faster-batched"), value=get_model_options("faster-batched")[0]),
269
- gr.Dropdown(choices=["int8", "float16", "float32"], label="Data Type", value="int8"),
270
- gr.Slider(minimum=1, maximum=32, step=1, value=16, label="Batch Size"),
271
- gr.Dropdown(choices=["yt-dlp", "pytube", "youtube-dl", "yt-dlp-alt", "ffmpeg", "aria2", "wget"], label="Download Method", value="yt-dlp"),
272
- gr.Number(label="Start Time (seconds)", value=0),
273
- gr.Number(label="End Time (seconds)", value=0),
274
- gr.Checkbox(label="Verbose Output", value=False)
275
- ],
276
- outputs=[
277
- gr.Textbox(label="Transcription Metrics and Verbose Messages", lines=10),
278
- gr.Textbox(label="Transcription", lines=10),
279
- gr.File(label="Download Transcription")
280
- ],
281
- title="Multi-Pipeline Transcription",
282
- description="Transcribe audio using multiple pipelines and models.",
283
- examples=[
284
- ["https://www.youtube.com/watch?v=daQ_hqA6HDo", "faster-batched", "cstr/whisper-large-v3-turbo-int8_float32", "int8", 16, "yt-dlp", 0, None, False],
285
- ["https://mcdn.podbean.com/mf/web/dir5wty678b6g4vg/HoP_453_-_The_Price_is_Right_-_Law_and_Economics_in_the_Second_Scholastic5yxzh.mp3", "faster-sequenced", "deepdml/faster-whisper-large-v3-turbo-ct2", "float16", 1, "ffmpeg", 0, 300, True],
286
- ["path/to/local/audio.mp3", "transformers", "openai/whisper-large-v3", "float16", 16, "yt-dlp", 60, 180, False]
287
- ],
288
- cache_examples=False,
289
- )
290
-
291
- pipeline_type_dropdown = iface.inputs[1]
292
- model_dropdown = iface.inputs[2]
293
-
294
- pipeline_type_dropdown.change(update_model_dropdown, inputs=[pipeline_type_dropdown], outputs=[model_dropdown])
 
 
 
 
 
 
 
 
 
 
295
 
296
  iface.launch()
 
260
  except:
261
  pass
262
 
263
+ with gr.Blocks() as iface:
264
+ gr.Markdown("# Multi-Pipeline Transcription")
265
+ gr.Markdown("Transcribe audio using multiple pipelines and models.")
266
+
267
+ with gr.Row():
268
+ input_source = gr.Textbox(label="Audio Source (Upload, URL, or YouTube URL)")
269
+ pipeline_type = gr.Dropdown(choices=["faster-batched", "faster-sequenced", "transformers"], label="Pipeline Type", value="faster-batched")
270
+ model_id = gr.Dropdown(label="Model", choices=get_model_options("faster-batched"), value=get_model_options("faster-batched")[0])
271
+
272
+ with gr.Row():
273
+ dtype = gr.Dropdown(choices=["int8", "float16", "float32"], label="Data Type", value="int8")
274
+ batch_size = gr.Slider(minimum=1, maximum=32, step=1, value=16, label="Batch Size")
275
+ download_method = gr.Dropdown(choices=["yt-dlp", "pytube", "youtube-dl", "yt-dlp-alt", "ffmpeg", "aria2", "wget"], label="Download Method", value="yt-dlp")
276
+
277
+ with gr.Row():
278
+ start_time = gr.Number(label="Start Time (seconds)", value=0)
279
+ end_time = gr.Number(label="End Time (seconds)", value=0)
280
+ verbose = gr.Checkbox(label="Verbose Output", value=False)
281
+
282
+ transcribe_button = gr.Button("Transcribe")
283
+
284
+ with gr.Row():
285
+ metrics_output = gr.Textbox(label="Transcription Metrics and Verbose Messages", lines=10)
286
+ transcription_output = gr.Textbox(label="Transcription", lines=10)
287
+ transcription_file = gr.File(label="Download Transcription")
288
+
289
+ pipeline_type.change(update_model_dropdown, inputs=[pipeline_type], outputs=[model_id])
290
+
291
+ transcribe_button.click(
292
+ transcribe_audio,
293
+ inputs=[input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose],
294
+ outputs=[metrics_output, transcription_output, transcription_file]
295
+ )
296
+
297
+ gr.Examples(
298
+ examples=[
299
+ ["https://www.youtube.com/watch?v=daQ_hqA6HDo", "faster-batched", "cstr/whisper-large-v3-turbo-int8_float32", "int8", 16, "yt-dlp", 0, None, False],
300
+ ["https://mcdn.podbean.com/mf/web/dir5wty678b6g4vg/HoP_453_-_The_Price_is_Right_-_Law_and_Economics_in_the_Second_Scholastic5yxzh.mp3", "faster-sequenced", "deepdml/faster-whisper-large-v3-turbo-ct2", "float16", 1, "ffmpeg", 0, 300, True],
301
+ ["path/to/local/audio.mp3", "transformers", "openai/whisper-large-v3", "float16", 16, "yt-dlp", 60, 180, False]
302
+ ],
303
+ inputs=[input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose],
304
+ )
305
 
306
  iface.launch()