transcribe_audio

Running

App Files Files Community

cstr commited on Oct 2, 2024

Commit

1c4f98a

verified ·

1 Parent(s): d680d0f

-Live

Browse files

Files changed (1) hide show

app.py +13 -17

app.py CHANGED Viewed

@@ -157,7 +157,6 @@ def save_transcription(transcription):
         f.write(transcription)
     return file_path
 def get_model_options(pipeline_type):
     if pipeline_type == "faster-batched":
         return ["cstr/whisper-large-v3-turbo-int8_float32", "deepdml/faster-whisper-large-v3-turbo-ct2", "Systran/faster-whisper-large-v3", "GalaktischeGurke/primeline-whisper-large-v3-german-ct2"]
@@ -212,15 +211,13 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
             trimmed_audio_path = trim_audio(audio_path, start_time or 0, end_time)
             audio_path = trimmed_audio_path
-        if model_choice == "faster-whisper":
-            start_time_perf = time.time()
-            segments, info = batched_model.transcribe(audio_path, batch_size=batch_size, initial_prompt=None)
-            end_time_perf = time.time()
         else:
-            start_time_perf = time.time()
-            result = pipe(audio_path)
             segments = result["chunks"]
-            end_time_perf = time.time()
         transcription_time = end_time_perf - start_time_perf
         audio_file_size = os.path.getsize(audio_path) / (1024 * 1024)
@@ -236,12 +233,12 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
         transcription = ""
         for segment in segments:
-            if model_choice == "faster-whisper":
-                transcription_segment = f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}\n"
-            else:
-                transcription_segment = f"[{segment['timestamp'][0]:.2f}s -> {segment['timestamp'][1]:.2f}s] {segment['text']}\n"
             transcription += transcription_segment
             if verbose:
                 yield metrics_output, transcription, None
@@ -289,12 +286,11 @@ iface = gr.Interface(
         ["path/to/local/audio.mp3", "transformers", "openai/whisper-large-v3", "float16", 16, "yt-dlp", 60, 180, False]
     ],
     cache_examples=False,
-    live=True
 )
-iface.launch()
 pipeline_type_dropdown = iface.inputs[1]
 model_dropdown = iface.inputs[2]
-pipeline_type_dropdown.change(update_model_dropdown, inputs=[pipeline_type_dropdown], outputs=[model_dropdown])

         f.write(transcription)
     return file_path
 def get_model_options(pipeline_type):
     if pipeline_type == "faster-batched":
         return ["cstr/whisper-large-v3-turbo-int8_float32", "deepdml/faster-whisper-large-v3-turbo-ct2", "Systran/faster-whisper-large-v3", "GalaktischeGurke/primeline-whisper-large-v3-german-ct2"]
             trimmed_audio_path = trim_audio(audio_path, start_time or 0, end_time)
             audio_path = trimmed_audio_path
+        start_time_perf = time.time()
+        if pipeline_type in ["faster-batched", "faster-sequenced"]:
+            segments, info = pipeline(audio_path, batch_size=batch_size)
         else:
+            result = pipeline(audio_path)
             segments = result["chunks"]
+        end_time_perf = time.time()
         transcription_time = end_time_perf - start_time_perf
         audio_file_size = os.path.getsize(audio_path) / (1024 * 1024)
         transcription = ""
         for segment in segments:
+            transcription_segment = (
+                f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}\n"
+                if pipeline_type in ["faster-batched", "faster-sequenced"] else
+                f"[{segment['timestamp'][0]:.2f}s -> {segment['timestamp'][1]:.2f}s] {segment['text']}\n"
+            )
             transcription += transcription_segment
             if verbose:
                 yield metrics_output, transcription, None
         ["path/to/local/audio.mp3", "transformers", "openai/whisper-large-v3", "float16", 16, "yt-dlp", 60, 180, False]
     ],
     cache_examples=False,
 )
 pipeline_type_dropdown = iface.inputs[1]
 model_dropdown = iface.inputs[2]
+pipeline_type_dropdown.change(update_model_dropdown, inputs=[pipeline_type_dropdown], outputs=[model_dropdown])
+iface.launch()