Spaces:
Running
Running
+v
Browse files
app.py
CHANGED
@@ -45,11 +45,11 @@ logging.info(f"Using device: {device}")
|
|
45 |
def download_audio(url, method_choice):
|
46 |
"""
|
47 |
Downloads audio from a given URL using the specified method.
|
48 |
-
|
49 |
Args:
|
50 |
url (str): The URL of the audio.
|
51 |
method_choice (str): The method to use for downloading audio.
|
52 |
-
|
53 |
Returns:
|
54 |
tuple: (path to the downloaded audio file, is_temp_file), or (error message, False).
|
55 |
"""
|
@@ -64,11 +64,14 @@ def download_audio(url, method_choice):
|
|
64 |
audio_file = download_direct_audio(url, method_choice)
|
65 |
|
66 |
if not audio_file or not os.path.exists(audio_file):
|
67 |
-
|
|
|
|
|
68 |
return audio_file, True
|
69 |
except Exception as e:
|
70 |
-
|
71 |
-
|
|
|
72 |
|
73 |
def download_youtube_audio(url, method_choice):
|
74 |
"""
|
@@ -114,15 +117,20 @@ def yt_dlp_method(url):
|
|
114 |
'preferredcodec': 'mp3',
|
115 |
'preferredquality': '192',
|
116 |
}],
|
117 |
-
'quiet':
|
118 |
'no_warnings': True,
|
|
|
119 |
}
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
|
|
|
|
|
|
|
|
126 |
|
127 |
def pytube_method(url):
|
128 |
"""
|
@@ -136,15 +144,24 @@ def pytube_method(url):
|
|
136 |
"""
|
137 |
logging.info("Using pytube method")
|
138 |
from pytube import YouTube
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
|
149 |
def download_rtsp_audio(url):
|
150 |
"""
|
@@ -173,11 +190,11 @@ def download_rtsp_audio(url):
|
|
173 |
def download_direct_audio(url, method_choice):
|
174 |
"""
|
175 |
Downloads audio from a direct URL using the specified method.
|
176 |
-
|
177 |
Args:
|
178 |
url (str): The direct URL of the audio file.
|
179 |
method_choice (str): The method to use for downloading.
|
180 |
-
|
181 |
Returns:
|
182 |
str: Path to the downloaded audio file, or None if failed.
|
183 |
"""
|
@@ -191,9 +208,14 @@ def download_direct_audio(url, method_choice):
|
|
191 |
}
|
192 |
method = methods.get(method_choice, requests_method)
|
193 |
try:
|
194 |
-
|
|
|
|
|
|
|
|
|
|
|
195 |
except Exception as e:
|
196 |
-
logging.error(f"Error downloading direct audio: {str(e)}")
|
197 |
return None
|
198 |
|
199 |
def requests_method(url):
|
@@ -402,10 +424,10 @@ loaded_models = {}
|
|
402 |
|
403 |
def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False):
|
404 |
"""
|
405 |
-
Transcribes audio from a given
|
406 |
|
407 |
Args:
|
408 |
-
input_source (str): URL of
|
409 |
pipeline_type (str): Type of pipeline to use ('faster-batched', 'faster-sequenced', or 'transformers').
|
410 |
model_id (str): The ID of the model to use.
|
411 |
dtype (str): Data type for model computations ('int8', 'float16', or 'float32').
|
@@ -430,22 +452,36 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
|
|
430 |
if verbose:
|
431 |
yield verbose_messages, "", None
|
432 |
|
433 |
-
#
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
442 |
return
|
443 |
|
444 |
# Convert start_time and end_time to float or None
|
445 |
start_time = float(start_time) if start_time else None
|
446 |
end_time = float(end_time) if end_time else None
|
447 |
|
448 |
-
# Trim the audio if start or end times are provided
|
449 |
if start_time is not None or end_time is not None:
|
450 |
audio_path = trim_audio(audio_path, start_time, end_time)
|
451 |
is_temp_file = True # The trimmed audio is a temporary file
|
@@ -459,7 +495,6 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
|
|
459 |
model_or_pipeline = loaded_models[model_key]
|
460 |
logging.info("Loaded model from cache")
|
461 |
else:
|
462 |
-
# Load the appropriate model or pipeline based on the pipeline type
|
463 |
if pipeline_type == "faster-batched":
|
464 |
model = WhisperModel(model_id, device=device, compute_type=dtype)
|
465 |
model_or_pipeline = BatchedInferencePipeline(model=model)
|
@@ -489,10 +524,11 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
|
|
489 |
device=device,
|
490 |
)
|
491 |
else:
|
492 |
-
|
|
|
|
|
493 |
loaded_models[model_key] = model_or_pipeline # Cache the model or pipeline
|
494 |
|
495 |
-
# Perform the transcription
|
496 |
start_time_perf = time.time()
|
497 |
if pipeline_type == "faster-batched":
|
498 |
segments, info = model_or_pipeline.transcribe(audio_path, batch_size=batch_size)
|
@@ -503,7 +539,6 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
|
|
503 |
segments = result["chunks"]
|
504 |
end_time_perf = time.time()
|
505 |
|
506 |
-
# Calculate metrics
|
507 |
transcription_time = end_time_perf - start_time_perf
|
508 |
audio_file_size = os.path.getsize(audio_path) / (1024 * 1024)
|
509 |
|
@@ -515,7 +550,6 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
|
|
515 |
if verbose:
|
516 |
yield verbose_messages + metrics_output, "", None
|
517 |
|
518 |
-
# Compile the transcription text
|
519 |
transcription = ""
|
520 |
|
521 |
for segment in segments:
|
@@ -527,13 +561,13 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
|
|
527 |
if verbose:
|
528 |
yield verbose_messages + metrics_output, transcription, None
|
529 |
|
530 |
-
# Save the transcription to a file
|
531 |
transcription_file = save_transcription(transcription)
|
532 |
yield verbose_messages + metrics_output, transcription, transcription_file
|
533 |
|
534 |
except Exception as e:
|
535 |
-
|
536 |
-
|
|
|
537 |
|
538 |
finally:
|
539 |
# Clean up temporary audio files
|
|
|
45 |
def download_audio(url, method_choice):
|
46 |
"""
|
47 |
Downloads audio from a given URL using the specified method.
|
48 |
+
|
49 |
Args:
|
50 |
url (str): The URL of the audio.
|
51 |
method_choice (str): The method to use for downloading audio.
|
52 |
+
|
53 |
Returns:
|
54 |
tuple: (path to the downloaded audio file, is_temp_file), or (error message, False).
|
55 |
"""
|
|
|
64 |
audio_file = download_direct_audio(url, method_choice)
|
65 |
|
66 |
if not audio_file or not os.path.exists(audio_file):
|
67 |
+
error_msg = f"Failed to download audio from {url} using method {method_choice}"
|
68 |
+
logging.error(error_msg)
|
69 |
+
return error_msg, False
|
70 |
return audio_file, True
|
71 |
except Exception as e:
|
72 |
+
error_msg = f"Error downloading audio from {url} using method {method_choice}: {str(e)}"
|
73 |
+
logging.error(error_msg)
|
74 |
+
return error_msg, False
|
75 |
|
76 |
def download_youtube_audio(url, method_choice):
|
77 |
"""
|
|
|
117 |
'preferredcodec': 'mp3',
|
118 |
'preferredquality': '192',
|
119 |
}],
|
120 |
+
'quiet': False,
|
121 |
'no_warnings': True,
|
122 |
+
'logger': logging.getLogger(), # Capture yt-dlp logs
|
123 |
}
|
124 |
+
try:
|
125 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
126 |
+
info = ydl.extract_info(url, download=True)
|
127 |
+
output_file = ydl.prepare_filename(info)
|
128 |
+
output_file = os.path.splitext(output_file)[0] + '.mp3'
|
129 |
+
logging.info(f"Downloaded YouTube audio: {output_file}")
|
130 |
+
return output_file
|
131 |
+
except Exception as e:
|
132 |
+
logging.error(f"Error in yt_dlp_method: {str(e)}")
|
133 |
+
raise Exception(f"yt-dlp failed to download audio: {str(e)}")
|
134 |
|
135 |
def pytube_method(url):
|
136 |
"""
|
|
|
144 |
"""
|
145 |
logging.info("Using pytube method")
|
146 |
from pytube import YouTube
|
147 |
+
try:
|
148 |
+
yt = YouTube(url)
|
149 |
+
audio_stream = yt.streams.filter(only_audio=True).first()
|
150 |
+
if audio_stream is None:
|
151 |
+
error_msg = "No audio streams available with pytube."
|
152 |
+
logging.error(error_msg)
|
153 |
+
raise Exception(error_msg)
|
154 |
+
temp_dir = tempfile.mkdtemp()
|
155 |
+
out_file = audio_stream.download(output_path=temp_dir)
|
156 |
+
base, ext = os.path.splitext(out_file)
|
157 |
+
new_file = base + '.mp3'
|
158 |
+
os.rename(out_file, new_file)
|
159 |
+
logging.info(f"Downloaded and converted audio to: {new_file}")
|
160 |
+
return new_file
|
161 |
+
except Exception as e:
|
162 |
+
logging.error(f"Error in pytube_method: {str(e)}")
|
163 |
+
raise Exception(f"pytube failed to download audio: {str(e)}")
|
164 |
+
|
165 |
|
166 |
def download_rtsp_audio(url):
|
167 |
"""
|
|
|
190 |
def download_direct_audio(url, method_choice):
|
191 |
"""
|
192 |
Downloads audio from a direct URL using the specified method.
|
193 |
+
|
194 |
Args:
|
195 |
url (str): The direct URL of the audio file.
|
196 |
method_choice (str): The method to use for downloading.
|
197 |
+
|
198 |
Returns:
|
199 |
str: Path to the downloaded audio file, or None if failed.
|
200 |
"""
|
|
|
208 |
}
|
209 |
method = methods.get(method_choice, requests_method)
|
210 |
try:
|
211 |
+
audio_file = method(url)
|
212 |
+
if not audio_file or not os.path.exists(audio_file):
|
213 |
+
error_msg = f"Failed to download direct audio from {url} using method {method_choice}"
|
214 |
+
logging.error(error_msg)
|
215 |
+
return None
|
216 |
+
return audio_file
|
217 |
except Exception as e:
|
218 |
+
logging.error(f"Error downloading direct audio with {method_choice}: {str(e)}")
|
219 |
return None
|
220 |
|
221 |
def requests_method(url):
|
|
|
424 |
|
425 |
def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False):
|
426 |
"""
|
427 |
+
Transcribes audio from a given source using the specified pipeline and model.
|
428 |
|
429 |
Args:
|
430 |
+
input_source (str or file): URL of audio, path to local file, or uploaded file object.
|
431 |
pipeline_type (str): Type of pipeline to use ('faster-batched', 'faster-sequenced', or 'transformers').
|
432 |
model_id (str): The ID of the model to use.
|
433 |
dtype (str): Data type for model computations ('int8', 'float16', or 'float32').
|
|
|
452 |
if verbose:
|
453 |
yield verbose_messages, "", None
|
454 |
|
455 |
+
# Determine if input_source is a URL or file
|
456 |
+
audio_path = None
|
457 |
+
is_temp_file = False
|
458 |
+
|
459 |
+
if isinstance(input_source, str) and (input_source.startswith('http://') or input_source.startswith('https://')):
|
460 |
+
# Input source is a URL
|
461 |
+
audio_path, is_temp_file = download_audio(input_source, download_method)
|
462 |
+
if not audio_path or audio_path.startswith("Error"):
|
463 |
+
error_msg = f"Error downloading audio: {audio_path}"
|
464 |
+
logging.error(error_msg)
|
465 |
+
yield error_msg, "", None
|
466 |
+
return
|
467 |
+
elif isinstance(input_source, str) and os.path.exists(input_source):
|
468 |
+
# Input source is a local file path
|
469 |
+
audio_path = input_source
|
470 |
+
is_temp_file = False
|
471 |
+
elif hasattr(input_source, 'name'):
|
472 |
+
# Input source is an uploaded file object
|
473 |
+
audio_path = input_source.name
|
474 |
+
is_temp_file = False
|
475 |
+
else:
|
476 |
+
error_msg = "No valid audio source provided."
|
477 |
+
logging.error(error_msg)
|
478 |
+
yield error_msg, "", None
|
479 |
return
|
480 |
|
481 |
# Convert start_time and end_time to float or None
|
482 |
start_time = float(start_time) if start_time else None
|
483 |
end_time = float(end_time) if end_time else None
|
484 |
|
|
|
485 |
if start_time is not None or end_time is not None:
|
486 |
audio_path = trim_audio(audio_path, start_time, end_time)
|
487 |
is_temp_file = True # The trimmed audio is a temporary file
|
|
|
495 |
model_or_pipeline = loaded_models[model_key]
|
496 |
logging.info("Loaded model from cache")
|
497 |
else:
|
|
|
498 |
if pipeline_type == "faster-batched":
|
499 |
model = WhisperModel(model_id, device=device, compute_type=dtype)
|
500 |
model_or_pipeline = BatchedInferencePipeline(model=model)
|
|
|
524 |
device=device,
|
525 |
)
|
526 |
else:
|
527 |
+
error_msg = "Invalid pipeline type"
|
528 |
+
logging.error(error_msg)
|
529 |
+
raise ValueError(error_msg)
|
530 |
loaded_models[model_key] = model_or_pipeline # Cache the model or pipeline
|
531 |
|
|
|
532 |
start_time_perf = time.time()
|
533 |
if pipeline_type == "faster-batched":
|
534 |
segments, info = model_or_pipeline.transcribe(audio_path, batch_size=batch_size)
|
|
|
539 |
segments = result["chunks"]
|
540 |
end_time_perf = time.time()
|
541 |
|
|
|
542 |
transcription_time = end_time_perf - start_time_perf
|
543 |
audio_file_size = os.path.getsize(audio_path) / (1024 * 1024)
|
544 |
|
|
|
550 |
if verbose:
|
551 |
yield verbose_messages + metrics_output, "", None
|
552 |
|
|
|
553 |
transcription = ""
|
554 |
|
555 |
for segment in segments:
|
|
|
561 |
if verbose:
|
562 |
yield verbose_messages + metrics_output, transcription, None
|
563 |
|
|
|
564 |
transcription_file = save_transcription(transcription)
|
565 |
yield verbose_messages + metrics_output, transcription, transcription_file
|
566 |
|
567 |
except Exception as e:
|
568 |
+
error_msg = f"An error occurred during transcription: {str(e)}"
|
569 |
+
logging.error(error_msg)
|
570 |
+
yield error_msg, "", None
|
571 |
|
572 |
finally:
|
573 |
# Clean up temporary audio files
|