Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -173,7 +173,23 @@ def wget_method(url):
|
|
173 |
def trim_audio(audio_path, start_time, end_time):
|
174 |
logging.info(f"Trimming audio from {start_time} to {end_time}")
|
175 |
audio = AudioSegment.from_file(audio_path)
|
176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
trimmed_audio_path = tempfile.mktemp(suffix='.wav')
|
178 |
trimmed_audio.export(trimmed_audio_path, format="wav")
|
179 |
logging.info(f"Trimmed audio saved to: {trimmed_audio_path}")
|
@@ -197,6 +213,20 @@ def get_model_options(pipeline_type):
|
|
197 |
return []
|
198 |
|
199 |
def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
try:
|
201 |
logging.info(f"Transcription parameters: pipeline_type={pipeline_type}, model_id={model_id}, dtype={dtype}, batch_size={batch_size}, download_method={download_method}")
|
202 |
verbose_messages = f"Starting transcription with parameters:\nPipeline Type: {pipeline_type}\nModel ID: {model_id}\nData Type: {dtype}\nBatch Size: {batch_size}\nDownload Method: {download_method}\n"
|
@@ -243,8 +273,11 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
|
|
243 |
else:
|
244 |
audio_path = input_source
|
245 |
|
|
|
|
|
|
|
246 |
if start_time is not None or end_time is not None:
|
247 |
-
trimmed_audio_path = trim_audio(audio_path, start_time
|
248 |
audio_path = trimmed_audio_path
|
249 |
verbose_messages += f"Audio trimmed from {start_time} to {end_time}\n"
|
250 |
if verbose:
|
@@ -289,23 +322,23 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
|
|
289 |
yield f"An error occurred: {str(e)}", "", None
|
290 |
|
291 |
finally:
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
|
303 |
with gr.Blocks() as iface:
|
304 |
gr.Markdown("# Multi-Pipeline Transcription")
|
305 |
gr.Markdown("Transcribe audio using multiple pipelines and models.")
|
306 |
|
307 |
with gr.Row():
|
308 |
-
input_source = gr.
|
309 |
pipeline_type = gr.Dropdown(
|
310 |
choices=["faster-batched", "faster-sequenced", "transformers"],
|
311 |
label="Pipeline Type",
|
@@ -327,8 +360,8 @@ with gr.Blocks() as iface:
|
|
327 |
)
|
328 |
|
329 |
with gr.Row():
|
330 |
-
start_time = gr.Number(label="Start Time (seconds)", value=0)
|
331 |
-
end_time = gr.Number(label="End Time (seconds)", value=0)
|
332 |
verbose = gr.Checkbox(label="Verbose Output", value=True) # Set to True by default
|
333 |
|
334 |
transcribe_button = gr.Button("Transcribe")
|
@@ -366,9 +399,9 @@ with gr.Blocks() as iface:
|
|
366 |
|
367 |
gr.Examples(
|
368 |
examples=[
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
],
|
373 |
inputs=[input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose],
|
374 |
)
|
|
|
173 |
def trim_audio(audio_path, start_time, end_time):
|
174 |
logging.info(f"Trimming audio from {start_time} to {end_time}")
|
175 |
audio = AudioSegment.from_file(audio_path)
|
176 |
+
audio_duration = len(audio) / 1000 # Duration in seconds
|
177 |
+
|
178 |
+
# Default start and end times if None
|
179 |
+
if start_time is None:
|
180 |
+
start_time = 0
|
181 |
+
if end_time is None or end_time > audio_duration:
|
182 |
+
end_time = audio_duration
|
183 |
+
|
184 |
+
# Validate times
|
185 |
+
if start_time < 0 or end_time < 0:
|
186 |
+
raise ValueError("Start time and end time must be non-negative.")
|
187 |
+
if start_time >= end_time:
|
188 |
+
raise gr.Error("End time must be greater than start time.")
|
189 |
+
if start_time > audio_duration:
|
190 |
+
raise ValueError("Start time exceeds audio duration.")
|
191 |
+
|
192 |
+
trimmed_audio = audio[start_time * 1000:end_time * 1000]
|
193 |
trimmed_audio_path = tempfile.mktemp(suffix='.wav')
|
194 |
trimmed_audio.export(trimmed_audio_path, format="wav")
|
195 |
logging.info(f"Trimmed audio saved to: {trimmed_audio_path}")
|
|
|
213 |
return []
|
214 |
|
215 |
def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False):
|
216 |
+
try:
|
217 |
+
# Determine if input_source is a URL or file
|
218 |
+
if isinstance(input_source, str):
|
219 |
+
if input_source.startswith('http://') or input_source.startswith('https://'):
|
220 |
+
audio_path = download_audio(input_source, download_method)
|
221 |
+
# Handle potential errors during download
|
222 |
+
if not audio_path or audio_path.startswith("Error"):
|
223 |
+
yield f"Error: {audio_path}", "", None
|
224 |
+
return
|
225 |
+
else:
|
226 |
+
# Assume input_source is an uploaded file object
|
227 |
+
audio_path = input_source.name
|
228 |
+
logging.info(f"Using uploaded audio file: {audio_path}")
|
229 |
+
|
230 |
try:
|
231 |
logging.info(f"Transcription parameters: pipeline_type={pipeline_type}, model_id={model_id}, dtype={dtype}, batch_size={batch_size}, download_method={download_method}")
|
232 |
verbose_messages = f"Starting transcription with parameters:\nPipeline Type: {pipeline_type}\nModel ID: {model_id}\nData Type: {dtype}\nBatch Size: {batch_size}\nDownload Method: {download_method}\n"
|
|
|
273 |
else:
|
274 |
audio_path = input_source
|
275 |
|
276 |
+
start_time = float(start_time) if start_time else None
|
277 |
+
end_time = float(end_time) if end_time else None
|
278 |
+
|
279 |
if start_time is not None or end_time is not None:
|
280 |
+
trimmed_audio_path = trim_audio(audio_path, start_time, end_time)
|
281 |
audio_path = trimmed_audio_path
|
282 |
verbose_messages += f"Audio trimmed from {start_time} to {end_time}\n"
|
283 |
if verbose:
|
|
|
322 |
yield f"An error occurred: {str(e)}", "", None
|
323 |
|
324 |
finally:
|
325 |
+
# Remove downloaded audio file
|
326 |
+
if audio_path and os.path.exists(audio_path):
|
327 |
+
os.remove(audio_path)
|
328 |
+
# Remove trimmed audio file
|
329 |
+
if 'trimmed_audio_path' in locals() and os.path.exists(trimmed_audio_path):
|
330 |
+
os.remove(trimmed_audio_path)
|
331 |
+
# Remove transcription file if needed
|
332 |
+
if transcription_file and os.path.exists(transcription_file):
|
333 |
+
os.remove(transcription_file)
|
334 |
+
|
335 |
|
336 |
with gr.Blocks() as iface:
|
337 |
gr.Markdown("# Multi-Pipeline Transcription")
|
338 |
gr.Markdown("Transcribe audio using multiple pipelines and models.")
|
339 |
|
340 |
with gr.Row():
|
341 |
+
input_source = gr.File(label="Audio Source (Upload a file or enter a URL/YouTube URL)")
|
342 |
pipeline_type = gr.Dropdown(
|
343 |
choices=["faster-batched", "faster-sequenced", "transformers"],
|
344 |
label="Pipeline Type",
|
|
|
360 |
)
|
361 |
|
362 |
with gr.Row():
|
363 |
+
start_time = gr.Number(label="Start Time (seconds)", value=None, minimum=0)
|
364 |
+
end_time = gr.Number(label="End Time (seconds)", value=None, minimum=0)
|
365 |
verbose = gr.Checkbox(label="Verbose Output", value=True) # Set to True by default
|
366 |
|
367 |
transcribe_button = gr.Button("Transcribe")
|
|
|
399 |
|
400 |
gr.Examples(
|
401 |
examples=[
|
402 |
+
["https://www.youtube.com/watch?v=daQ_hqA6HDo", "faster-batched", "cstr/whisper-large-v3-turbo-int8_float32", "int8", 16, "yt-dlp", None, None, True],
|
403 |
+
["https://mcdn.podbean.com/mf/web/dir5wty678b6g4vg/HoP_453_-_The_Price_is_Right_-_Law_and_Economics_in_the_Second_Scholastic5yxzh.mp3", "faster-sequenced", "deepdml/faster-whisper-large-v3-turbo-ct2", "float16", 1, "ffmpeg", 0, 300, True],
|
404 |
+
[None, "transformers", "openai/whisper-large-v3", "float16", 16, "yt-dlp", 60, 180, True]
|
405 |
],
|
406 |
inputs=[input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose],
|
407 |
)
|