cstr commited on
Commit
1c4f98a
1 Parent(s): d680d0f
Files changed (1) hide show
  1. app.py +13 -17
app.py CHANGED
@@ -157,7 +157,6 @@ def save_transcription(transcription):
157
  f.write(transcription)
158
  return file_path
159
 
160
-
161
  def get_model_options(pipeline_type):
162
  if pipeline_type == "faster-batched":
163
  return ["cstr/whisper-large-v3-turbo-int8_float32", "deepdml/faster-whisper-large-v3-turbo-ct2", "Systran/faster-whisper-large-v3", "GalaktischeGurke/primeline-whisper-large-v3-german-ct2"]
@@ -212,15 +211,13 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
212
  trimmed_audio_path = trim_audio(audio_path, start_time or 0, end_time)
213
  audio_path = trimmed_audio_path
214
 
215
- if model_choice == "faster-whisper":
216
- start_time_perf = time.time()
217
- segments, info = batched_model.transcribe(audio_path, batch_size=batch_size, initial_prompt=None)
218
- end_time_perf = time.time()
219
  else:
220
- start_time_perf = time.time()
221
- result = pipe(audio_path)
222
  segments = result["chunks"]
223
- end_time_perf = time.time()
224
 
225
  transcription_time = end_time_perf - start_time_perf
226
  audio_file_size = os.path.getsize(audio_path) / (1024 * 1024)
@@ -236,12 +233,12 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
236
  transcription = ""
237
 
238
  for segment in segments:
239
- if model_choice == "faster-whisper":
240
- transcription_segment = f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}\n"
241
- else:
242
- transcription_segment = f"[{segment['timestamp'][0]:.2f}s -> {segment['timestamp'][1]:.2f}s] {segment['text']}\n"
 
243
  transcription += transcription_segment
244
-
245
  if verbose:
246
  yield metrics_output, transcription, None
247
 
@@ -289,12 +286,11 @@ iface = gr.Interface(
289
  ["path/to/local/audio.mp3", "transformers", "openai/whisper-large-v3", "float16", 16, "yt-dlp", 60, 180, False]
290
  ],
291
  cache_examples=False,
292
- live=True
293
  )
294
 
295
- iface.launch()
296
-
297
  pipeline_type_dropdown = iface.inputs[1]
298
  model_dropdown = iface.inputs[2]
299
 
300
- pipeline_type_dropdown.change(update_model_dropdown, inputs=[pipeline_type_dropdown], outputs=[model_dropdown])
 
 
 
157
  f.write(transcription)
158
  return file_path
159
 
 
160
  def get_model_options(pipeline_type):
161
  if pipeline_type == "faster-batched":
162
  return ["cstr/whisper-large-v3-turbo-int8_float32", "deepdml/faster-whisper-large-v3-turbo-ct2", "Systran/faster-whisper-large-v3", "GalaktischeGurke/primeline-whisper-large-v3-german-ct2"]
 
211
  trimmed_audio_path = trim_audio(audio_path, start_time or 0, end_time)
212
  audio_path = trimmed_audio_path
213
 
214
+ start_time_perf = time.time()
215
+ if pipeline_type in ["faster-batched", "faster-sequenced"]:
216
+ segments, info = pipeline(audio_path, batch_size=batch_size)
 
217
  else:
218
+ result = pipeline(audio_path)
 
219
  segments = result["chunks"]
220
+ end_time_perf = time.time()
221
 
222
  transcription_time = end_time_perf - start_time_perf
223
  audio_file_size = os.path.getsize(audio_path) / (1024 * 1024)
 
233
  transcription = ""
234
 
235
  for segment in segments:
236
+ transcription_segment = (
237
+ f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}\n"
238
+ if pipeline_type in ["faster-batched", "faster-sequenced"] else
239
+ f"[{segment['timestamp'][0]:.2f}s -> {segment['timestamp'][1]:.2f}s] {segment['text']}\n"
240
+ )
241
  transcription += transcription_segment
 
242
  if verbose:
243
  yield metrics_output, transcription, None
244
 
 
286
  ["path/to/local/audio.mp3", "transformers", "openai/whisper-large-v3", "float16", 16, "yt-dlp", 60, 180, False]
287
  ],
288
  cache_examples=False,
 
289
  )
290
 
 
 
291
  pipeline_type_dropdown = iface.inputs[1]
292
  model_dropdown = iface.inputs[2]
293
 
294
+ pipeline_type_dropdown.change(update_model_dropdown, inputs=[pipeline_type_dropdown], outputs=[model_dropdown])
295
+
296
+ iface.launch()