cstr commited on
Commit
f1aba6f
·
verified ·
1 Parent(s): fa54222

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -19
app.py CHANGED
@@ -51,7 +51,7 @@ def download_audio(url, method_choice):
51
  method_choice (str): The method to use for downloading audio.
52
 
53
  Returns:
54
- tuple: (path to the downloaded audio file, is_temp_file), or (error message, False).
55
  """
56
  parsed_url = urlparse(url)
57
  logging.info(f"Downloading audio from URL: {url} using method: {method_choice}")
@@ -66,21 +66,21 @@ def download_audio(url, method_choice):
66
  if not audio_file or not os.path.exists(audio_file):
67
  error_msg = f"Failed to download audio from {url} using method {method_choice}"
68
  logging.error(error_msg)
69
- return error_msg, False
70
  return audio_file, True
71
  except Exception as e:
72
  error_msg = f"Error downloading audio from {url} using method {method_choice}: {str(e)}"
73
  logging.error(error_msg)
74
- return error_msg, False
75
 
76
  def download_youtube_audio(url, method_choice):
77
  """
78
  Downloads audio from a YouTube URL using the specified method.
79
-
80
  Args:
81
  url (str): The YouTube URL.
82
  method_choice (str): The method to use for downloading.
83
-
84
  Returns:
85
  str: Path to the downloaded audio file, or None if failed.
86
  """
@@ -104,7 +104,7 @@ def yt_dlp_method(url):
104
  url (str): The YouTube URL.
105
 
106
  Returns:
107
- str: Path to the downloaded audio file.
108
  """
109
  logging.info("Using yt-dlp method")
110
  temp_dir = tempfile.mkdtemp()
@@ -129,8 +129,8 @@ def yt_dlp_method(url):
129
  logging.info(f"Downloaded YouTube audio: {output_file}")
130
  return output_file
131
  except Exception as e:
132
- logging.error(f"Error in yt_dlp_method: {str(e)}")
133
- raise Exception(f"yt-dlp failed to download audio: {str(e)}")
134
 
135
  def pytube_method(url):
136
  """
@@ -140,7 +140,7 @@ def pytube_method(url):
140
  url (str): The YouTube URL.
141
 
142
  Returns:
143
- str: Path to the downloaded audio file.
144
  """
145
  logging.info("Using pytube method")
146
  from pytube import YouTube
@@ -150,7 +150,7 @@ def pytube_method(url):
150
  if audio_stream is None:
151
  error_msg = "No audio streams available with pytube."
152
  logging.error(error_msg)
153
- raise Exception(error_msg)
154
  temp_dir = tempfile.mkdtemp()
155
  out_file = audio_stream.download(output_path=temp_dir)
156
  base, ext = os.path.splitext(out_file)
@@ -159,9 +159,9 @@ def pytube_method(url):
159
  logging.info(f"Downloaded and converted audio to: {new_file}")
160
  return new_file
161
  except Exception as e:
162
- logging.error(f"Error in pytube_method: {str(e)}")
163
- raise Exception(f"pytube failed to download audio: {str(e)}")
164
-
165
 
166
  def download_rtsp_audio(url):
167
  """
@@ -459,10 +459,10 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
459
  if isinstance(input_source, str) and (input_source.startswith('http://') or input_source.startswith('https://')):
460
  # Input source is a URL
461
  audio_path, is_temp_file = download_audio(input_source, download_method)
462
- if not audio_path or audio_path.startswith("Error"):
463
- error_msg = f"Error downloading audio: {audio_path}"
464
  logging.error(error_msg)
465
- yield error_msg, "", None
466
  return
467
  elif isinstance(input_source, str) and os.path.exists(input_source):
468
  # Input source is a local file path
@@ -475,7 +475,7 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
475
  else:
476
  error_msg = "No valid audio source provided."
477
  logging.error(error_msg)
478
- yield error_msg, "", None
479
  return
480
 
481
  # Convert start_time and end_time to float or None
@@ -526,9 +526,11 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
526
  else:
527
  error_msg = "Invalid pipeline type"
528
  logging.error(error_msg)
529
- raise ValueError(error_msg)
 
530
  loaded_models[model_key] = model_or_pipeline # Cache the model or pipeline
531
 
 
532
  start_time_perf = time.time()
533
  if pipeline_type == "faster-batched":
534
  segments, info = model_or_pipeline.transcribe(audio_path, batch_size=batch_size)
@@ -539,6 +541,7 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
539
  segments = result["chunks"]
540
  end_time_perf = time.time()
541
 
 
542
  transcription_time = end_time_perf - start_time_perf
543
  audio_file_size = os.path.getsize(audio_path) / (1024 * 1024)
544
 
@@ -550,6 +553,7 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
550
  if verbose:
551
  yield verbose_messages + metrics_output, "", None
552
 
 
553
  transcription = ""
554
 
555
  for segment in segments:
@@ -561,13 +565,14 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
561
  if verbose:
562
  yield verbose_messages + metrics_output, transcription, None
563
 
 
564
  transcription_file = save_transcription(transcription)
565
  yield verbose_messages + metrics_output, transcription, transcription_file
566
 
567
  except Exception as e:
568
  error_msg = f"An error occurred during transcription: {str(e)}"
569
  logging.error(error_msg)
570
- yield error_msg, "", None
571
 
572
  finally:
573
  # Clean up temporary audio files
 
51
  method_choice (str): The method to use for downloading audio.
52
 
53
  Returns:
54
+ tuple: (path to the downloaded audio file, is_temp_file), or (None, False) if failed.
55
  """
56
  parsed_url = urlparse(url)
57
  logging.info(f"Downloading audio from URL: {url} using method: {method_choice}")
 
66
  if not audio_file or not os.path.exists(audio_file):
67
  error_msg = f"Failed to download audio from {url} using method {method_choice}"
68
  logging.error(error_msg)
69
+ return None, False
70
  return audio_file, True
71
  except Exception as e:
72
  error_msg = f"Error downloading audio from {url} using method {method_choice}: {str(e)}"
73
  logging.error(error_msg)
74
+ return None, False
75
 
76
  def download_youtube_audio(url, method_choice):
77
  """
78
  Downloads audio from a YouTube URL using the specified method.
79
+
80
  Args:
81
  url (str): The YouTube URL.
82
  method_choice (str): The method to use for downloading.
83
+
84
  Returns:
85
  str: Path to the downloaded audio file, or None if failed.
86
  """
 
104
  url (str): The YouTube URL.
105
 
106
  Returns:
107
+ str: Path to the downloaded audio file, or None if failed.
108
  """
109
  logging.info("Using yt-dlp method")
110
  temp_dir = tempfile.mkdtemp()
 
129
  logging.info(f"Downloaded YouTube audio: {output_file}")
130
  return output_file
131
  except Exception as e:
132
+ logging.error(f"yt-dlp failed to download audio: {str(e)}")
133
+ return None
134
 
135
  def pytube_method(url):
136
  """
 
140
  url (str): The YouTube URL.
141
 
142
  Returns:
143
+ str: Path to the downloaded audio file, or None if failed.
144
  """
145
  logging.info("Using pytube method")
146
  from pytube import YouTube
 
150
  if audio_stream is None:
151
  error_msg = "No audio streams available with pytube."
152
  logging.error(error_msg)
153
+ return None
154
  temp_dir = tempfile.mkdtemp()
155
  out_file = audio_stream.download(output_path=temp_dir)
156
  base, ext = os.path.splitext(out_file)
 
159
  logging.info(f"Downloaded and converted audio to: {new_file}")
160
  return new_file
161
  except Exception as e:
162
+ logging.error(f"pytube failed to download audio: {str(e)}")
163
+ return None
164
+
165
 
166
  def download_rtsp_audio(url):
167
  """
 
459
  if isinstance(input_source, str) and (input_source.startswith('http://') or input_source.startswith('https://')):
460
  # Input source is a URL
461
  audio_path, is_temp_file = download_audio(input_source, download_method)
462
+ if not audio_path:
463
+ error_msg = f"Error downloading audio from {input_source} using method {download_method}"
464
  logging.error(error_msg)
465
+ yield verbose_messages + error_msg, "", None
466
  return
467
  elif isinstance(input_source, str) and os.path.exists(input_source):
468
  # Input source is a local file path
 
475
  else:
476
  error_msg = "No valid audio source provided."
477
  logging.error(error_msg)
478
+ yield verbose_messages + error_msg, "", None
479
  return
480
 
481
  # Convert start_time and end_time to float or None
 
526
  else:
527
  error_msg = "Invalid pipeline type"
528
  logging.error(error_msg)
529
+ yield verbose_messages + error_msg, "", None
530
+ return
531
  loaded_models[model_key] = model_or_pipeline # Cache the model or pipeline
532
 
533
+ # Perform the transcription
534
  start_time_perf = time.time()
535
  if pipeline_type == "faster-batched":
536
  segments, info = model_or_pipeline.transcribe(audio_path, batch_size=batch_size)
 
541
  segments = result["chunks"]
542
  end_time_perf = time.time()
543
 
544
+ # Calculate metrics
545
  transcription_time = end_time_perf - start_time_perf
546
  audio_file_size = os.path.getsize(audio_path) / (1024 * 1024)
547
 
 
553
  if verbose:
554
  yield verbose_messages + metrics_output, "", None
555
 
556
+ # Compile the transcription text
557
  transcription = ""
558
 
559
  for segment in segments:
 
565
  if verbose:
566
  yield verbose_messages + metrics_output, transcription, None
567
 
568
+ # Save the transcription to a file
569
  transcription_file = save_transcription(transcription)
570
  yield verbose_messages + metrics_output, transcription, transcription_file
571
 
572
  except Exception as e:
573
  error_msg = f"An error occurred during transcription: {str(e)}"
574
  logging.error(error_msg)
575
+ yield verbose_messages + error_msg, "", None
576
 
577
  finally:
578
  # Clean up temporary audio files