transcribe_audio

Running

App Files Files Community

cstr commited on Oct 2, 2024

Commit

d5bbd76

verified ·

1 Parent(s): 1c4f98a

gr.blocks

Browse files

Files changed (1) hide show

app.py +42 -32

app.py CHANGED Viewed

@@ -260,37 +260,47 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
             except:
                 pass
-iface = gr.Interface(
-    fn=transcribe_audio,
-    inputs=[
-        gr.Textbox(label="Audio Source (Upload, URL, or YouTube URL)"),
-        gr.Dropdown(choices=["faster-batched", "faster-sequenced", "transformers"], label="Pipeline Type", value="faster-batched"),
-        gr.Dropdown(label="Model", choices=get_model_options("faster-batched"), value=get_model_options("faster-batched")[0]),
-        gr.Dropdown(choices=["int8", "float16", "float32"], label="Data Type", value="int8"),
-        gr.Slider(minimum=1, maximum=32, step=1, value=16, label="Batch Size"),
-        gr.Dropdown(choices=["yt-dlp", "pytube", "youtube-dl", "yt-dlp-alt", "ffmpeg", "aria2", "wget"], label="Download Method", value="yt-dlp"),
-        gr.Number(label="Start Time (seconds)", value=0),
-        gr.Number(label="End Time (seconds)", value=0),
-        gr.Checkbox(label="Verbose Output", value=False)
-    ],
-    outputs=[
-        gr.Textbox(label="Transcription Metrics and Verbose Messages", lines=10),
-        gr.Textbox(label="Transcription", lines=10),
-        gr.File(label="Download Transcription")
-    ],
-    title="Multi-Pipeline Transcription",
-    description="Transcribe audio using multiple pipelines and models.",
-    examples=[
-        ["https://www.youtube.com/watch?v=daQ_hqA6HDo", "faster-batched", "cstr/whisper-large-v3-turbo-int8_float32", "int8", 16, "yt-dlp", 0, None, False],
-        ["https://mcdn.podbean.com/mf/web/dir5wty678b6g4vg/HoP_453_-_The_Price_is_Right_-_Law_and_Economics_in_the_Second_Scholastic5yxzh.mp3", "faster-sequenced", "deepdml/faster-whisper-large-v3-turbo-ct2", "float16", 1, "ffmpeg", 0, 300, True],
-        ["path/to/local/audio.mp3", "transformers", "openai/whisper-large-v3", "float16", 16, "yt-dlp", 60, 180, False]
-    ],
-    cache_examples=False,
-)
-pipeline_type_dropdown = iface.inputs[1]
-model_dropdown = iface.inputs[2]
-pipeline_type_dropdown.change(update_model_dropdown, inputs=[pipeline_type_dropdown], outputs=[model_dropdown])
 iface.launch()

             except:
                 pass
+with gr.Blocks() as iface:
+    gr.Markdown("# Multi-Pipeline Transcription")
+    gr.Markdown("Transcribe audio using multiple pipelines and models.")
+    with gr.Row():
+        input_source = gr.Textbox(label="Audio Source (Upload, URL, or YouTube URL)")
+        pipeline_type = gr.Dropdown(choices=["faster-batched", "faster-sequenced", "transformers"], label="Pipeline Type", value="faster-batched")
+        model_id = gr.Dropdown(label="Model", choices=get_model_options("faster-batched"), value=get_model_options("faster-batched")[0])
+    with gr.Row():
+        dtype = gr.Dropdown(choices=["int8", "float16", "float32"], label="Data Type", value="int8")
+        batch_size = gr.Slider(minimum=1, maximum=32, step=1, value=16, label="Batch Size")
+        download_method = gr.Dropdown(choices=["yt-dlp", "pytube", "youtube-dl", "yt-dlp-alt", "ffmpeg", "aria2", "wget"], label="Download Method", value="yt-dlp")
+    with gr.Row():
+        start_time = gr.Number(label="Start Time (seconds)", value=0)
+        end_time = gr.Number(label="End Time (seconds)", value=0)
+        verbose = gr.Checkbox(label="Verbose Output", value=False)
+    transcribe_button = gr.Button("Transcribe")
+    with gr.Row():
+        metrics_output = gr.Textbox(label="Transcription Metrics and Verbose Messages", lines=10)
+        transcription_output = gr.Textbox(label="Transcription", lines=10)
+        transcription_file = gr.File(label="Download Transcription")
+    pipeline_type.change(update_model_dropdown, inputs=[pipeline_type], outputs=[model_id])
+    transcribe_button.click(
+        transcribe_audio,
+        inputs=[input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose],
+        outputs=[metrics_output, transcription_output, transcription_file]
+    )
+    gr.Examples(
+        examples=[
+            ["https://www.youtube.com/watch?v=daQ_hqA6HDo", "faster-batched", "cstr/whisper-large-v3-turbo-int8_float32", "int8", 16, "yt-dlp", 0, None, False],
+            ["https://mcdn.podbean.com/mf/web/dir5wty678b6g4vg/HoP_453_-_The_Price_is_Right_-_Law_and_Economics_in_the_Second_Scholastic5yxzh.mp3", "faster-sequenced", "deepdml/faster-whisper-large-v3-turbo-ct2", "float16", 1, "ffmpeg", 0, 300, True],
+            ["path/to/local/audio.mp3", "transformers", "openai/whisper-large-v3", "float16", 16, "yt-dlp", 60, 180, False]
+        ],
+        inputs=[input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose],
+    )
 iface.launch()