cstr commited on
Commit
26eb097
·
verified ·
1 Parent(s): ae4be5c

+verbosity

Browse files
Files changed (1) hide show
  1. app.py +15 -21
app.py CHANGED
@@ -15,15 +15,13 @@ import yt_dlp
15
  logging.basicConfig(level=logging.INFO)
16
 
17
  # Clone and install faster-whisper from GitHub
18
- # (we should be able to do this in build.sh in a hf space)
19
  try:
20
  subprocess.run(["git", "clone", "https://github.com/SYSTRAN/faster-whisper.git"], check=True)
21
  subprocess.run(["pip", "install", "-e", "./faster-whisper"], check=True)
22
  except subprocess.CalledProcessError as e:
23
- print(f"Error during faster-whisper installation: {e}")
24
  sys.exit(1)
25
 
26
- # Add the faster-whisper directory to the Python path
27
  sys.path.append("./faster-whisper")
28
 
29
  from faster_whisper import WhisperModel
@@ -159,16 +157,18 @@ def save_transcription(transcription):
159
 
160
  def get_model_options(pipeline_type):
161
  if pipeline_type == "faster-batched":
162
- return ["cstr/whisper-large-v3-turbo-int8_float32", "deepdml/faster-whisper-large-v3-turbo-ct2", "Systran/faster-whisper-large-v3", "GalaktischeGurke/primeline-whisper-large-v3-german-ct2"]
163
  elif pipeline_type == "faster-sequenced":
164
- return ["cstr/whisper-large-v3-turbo-int8_float32", "deepdml/faster-whisper-large-v3-turbo-ct2", "Systran/faster-whisper-large-v3", "GalaktischeGurke/primeline-whisper-large-v3-german-ct2"]
165
  elif pipeline_type == "transformers":
166
- return ["openai/whisper-large-v3", "openai/whisper-large-v3-turbo", "primeline/whisper-large-v3-german"]
167
- else:
168
- return []
169
-
170
  def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False):
171
  try:
 
 
 
172
  if pipeline_type == "faster-batched":
173
  model = WhisperModel(model_id, device="auto", compute_type=dtype)
174
  pipeline = BatchedInferencePipeline(model=model)
@@ -198,6 +198,7 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
198
 
199
  if isinstance(input_source, str) and (input_source.startswith('http://') or input_source.startswith('https://')):
200
  audio_path = download_audio(input_source, download_method)
 
201
  if audio_path.startswith("Error"):
202
  yield f"Error: {audio_path}", "", None
203
  return
@@ -207,6 +208,7 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
207
  if start_time is not None or end_time is not None:
208
  trimmed_audio_path = trim_audio(audio_path, start_time or 0, end_time)
209
  audio_path = trimmed_audio_path
 
210
 
211
  start_time_perf = time.time()
212
  if pipeline_type in ["faster-batched", "faster-sequenced"]:
@@ -225,7 +227,7 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
225
  )
226
 
227
  if verbose:
228
- yield metrics_output, "", None
229
 
230
  transcription = ""
231
 
@@ -237,12 +239,13 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
237
  )
238
  transcription += transcription_segment
239
  if verbose:
240
- yield metrics_output, transcription, None
241
 
242
  transcription_file = save_transcription(transcription)
243
- yield metrics_output, transcription, transcription_file
244
 
245
  except Exception as e:
 
246
  yield f"An error occurred: {str(e)}", "", None
247
 
248
  finally:
@@ -261,15 +264,6 @@ def update_model_dropdown(pipeline_type):
261
  model_choices = get_model_options(pipeline_type)
262
  return gr.Dropdown.update(choices=model_choices, value=model_choices[0])
263
 
264
- def get_model_options(pipeline_type):
265
- if pipeline_type == "faster-batched":
266
- return ["cstr/whisper-large-v3-turbo-int8_float32"]
267
- elif pipeline_type == "faster-sequenced":
268
- return ["deepdml/faster-whisper-large-v3-turbo-ct2"]
269
- elif pipeline_type == "transformers":
270
- return ["openai/whisper-large-v3"]
271
- return []
272
-
273
  with gr.Blocks() as iface:
274
  gr.Markdown("# Multi-Pipeline Transcription")
275
  gr.Markdown("Transcribe audio using multiple pipelines and models.")
 
15
  logging.basicConfig(level=logging.INFO)
16
 
17
  # Clone and install faster-whisper from GitHub
 
18
  try:
19
  subprocess.run(["git", "clone", "https://github.com/SYSTRAN/faster-whisper.git"], check=True)
20
  subprocess.run(["pip", "install", "-e", "./faster-whisper"], check=True)
21
  except subprocess.CalledProcessError as e:
22
+ logging.error(f"Error during faster-whisper installation: {e}")
23
  sys.exit(1)
24
 
 
25
  sys.path.append("./faster-whisper")
26
 
27
  from faster_whisper import WhisperModel
 
157
 
158
  def get_model_options(pipeline_type):
159
  if pipeline_type == "faster-batched":
160
+ return ["cstr/whisper-large-v3-turbo-int8_float32"]
161
  elif pipeline_type == "faster-sequenced":
162
+ return ["deepdml/faster-whisper-large-v3-turbo-ct2"]
163
  elif pipeline_type == "transformers":
164
+ return ["openai/whisper-large-v3"]
165
+ return []
166
+
 
167
  def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False):
168
  try:
169
+ logging.info(f"Transcription parameters: pipeline_type={pipeline_type}, model_id={model_id}, dtype={dtype}, batch_size={batch_size}, download_method={download_method}")
170
+ verbose_messages = f"Starting transcription with parameters:\nPipeline Type: {pipeline_type}\nModel ID: {model_id}\nData Type: {dtype}\nBatch Size: {batch_size}\nDownload Method: {download_method}\n"
171
+
172
  if pipeline_type == "faster-batched":
173
  model = WhisperModel(model_id, device="auto", compute_type=dtype)
174
  pipeline = BatchedInferencePipeline(model=model)
 
198
 
199
  if isinstance(input_source, str) and (input_source.startswith('http://') or input_source.startswith('https://')):
200
  audio_path = download_audio(input_source, download_method)
201
+ verbose_messages += f"Audio file downloaded: {audio_path}\n"
202
  if audio_path.startswith("Error"):
203
  yield f"Error: {audio_path}", "", None
204
  return
 
208
  if start_time is not None or end_time is not None:
209
  trimmed_audio_path = trim_audio(audio_path, start_time or 0, end_time)
210
  audio_path = trimmed_audio_path
211
+ verbose_messages += f"Audio trimmed from {start_time} to {end_time}\n"
212
 
213
  start_time_perf = time.time()
214
  if pipeline_type in ["faster-batched", "faster-sequenced"]:
 
227
  )
228
 
229
  if verbose:
230
+ yield verbose_messages + metrics_output, "", None
231
 
232
  transcription = ""
233
 
 
239
  )
240
  transcription += transcription_segment
241
  if verbose:
242
+ yield verbose_messages + metrics_output, transcription, None
243
 
244
  transcription_file = save_transcription(transcription)
245
+ yield verbose_messages + metrics_output, transcription, transcription_file
246
 
247
  except Exception as e:
248
+ logging.error(f"An error occurred during transcription: {str(e)}")
249
  yield f"An error occurred: {str(e)}", "", None
250
 
251
  finally:
 
264
  model_choices = get_model_options(pipeline_type)
265
  return gr.Dropdown.update(choices=model_choices, value=model_choices[0])
266
 
 
 
 
 
 
 
 
 
 
267
  with gr.Blocks() as iface:
268
  gr.Markdown("# Multi-Pipeline Transcription")
269
  gr.Markdown("Transcribe audio using multiple pipelines and models.")