Upload folder using huggingface_hub
Browse files- src/ui/__pycache__/gradio_ui.cpython-310.pyc +0 -0
- src/ui/gradio_ui.py +363 -342
src/ui/__pycache__/gradio_ui.cpython-310.pyc
CHANGED
Binary files a/src/ui/__pycache__/gradio_ui.cpython-310.pyc and b/src/ui/__pycache__/gradio_ui.cpython-310.pyc differ
|
|
src/ui/gradio_ui.py
CHANGED
@@ -5,10 +5,10 @@ Contains all UI components and interface logic
|
|
5 |
|
6 |
import gradio as gr
|
7 |
import asyncio
|
|
|
8 |
from ..tools import mcp_tools
|
9 |
from ..tools.download_tools import get_file_info_tool, get_mp3_files_tool, read_text_file_segments_tool
|
10 |
from ..tools.transcription_tools import transcribe_audio_file_tool
|
11 |
-
import os
|
12 |
|
13 |
def write_text_file_content(file_path: str, content: str, mode: str = "w", position: int = None):
|
14 |
"""Simple text file writing function"""
|
@@ -33,6 +33,73 @@ def write_text_file_content(file_path: str, content: str, mode: str = "w", posit
|
|
33 |
"error_message": str(e)
|
34 |
}
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
def create_gradio_interface():
|
37 |
"""Create Gradio interface
|
38 |
|
@@ -128,81 +195,97 @@ def create_gradio_interface():
|
|
128 |
info="Identify different speakers (requires Hugging Face Token)"
|
129 |
)
|
130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
download_btn = gr.Button("π₯ Start Download", variant="primary")
|
132 |
result_output = gr.JSON(label="Download Results")
|
133 |
|
134 |
-
async def download_podcast_and_transcribe(url, platform, auto_transcribe, enable_speaker):
|
135 |
"""Call corresponding download tool based on selected platform"""
|
136 |
-
|
137 |
-
|
138 |
-
else:
|
139 |
-
download_result = await mcp_tools.download_xyz_podcast(url)
|
140 |
-
|
141 |
-
# 2. Check if download was successful
|
142 |
-
if download_result["status"] != "success":
|
143 |
-
return {
|
144 |
-
"download_status": "failed",
|
145 |
-
"error_message": download_result.get("error_message", "Download failed"),
|
146 |
-
"transcription_status": "not_started"
|
147 |
-
}
|
148 |
-
|
149 |
-
# 3. If not auto-transcribing, return only download results
|
150 |
-
if not auto_transcribe:
|
151 |
-
return {
|
152 |
-
"download_status": "success",
|
153 |
-
"audio_file": download_result["audio_file_path"],
|
154 |
-
"transcription_status": "skipped (user chose not to auto-transcribe)"
|
155 |
-
}
|
156 |
|
157 |
-
# 4. Start transcription
|
158 |
try:
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
model_size="turbo",
|
164 |
-
language=None,
|
165 |
-
output_format="srt",
|
166 |
-
enable_speaker_diarization=enable_speaker
|
167 |
-
)
|
168 |
|
169 |
-
#
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
"srt_file_path": transcribe_result.get("srt_file_path"),
|
176 |
-
"transcription_details": {
|
177 |
-
"model_used": transcribe_result.get("model_used"),
|
178 |
-
"segment_count": transcribe_result.get("segment_count"),
|
179 |
-
"audio_duration": transcribe_result.get("audio_duration"),
|
180 |
-
"saved_files": transcribe_result.get("saved_files", []),
|
181 |
-
"speaker_diarization_enabled": transcribe_result.get("speaker_diarization_enabled", False)
|
182 |
}
|
183 |
-
}
|
184 |
|
185 |
-
#
|
186 |
-
if
|
187 |
-
|
188 |
-
"
|
189 |
-
"
|
|
|
190 |
}
|
191 |
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
|
202 |
# Bind callback function
|
203 |
download_btn.click(
|
204 |
download_podcast_and_transcribe,
|
205 |
-
inputs=[url_input, platform_choice, auto_transcribe, enable_speaker_diarization],
|
206 |
outputs=result_output
|
207 |
)
|
208 |
|
@@ -236,16 +319,25 @@ def create_gradio_interface():
|
|
236 |
)
|
237 |
|
238 |
with gr.Row():
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
|
250 |
transcribe_btn = gr.Button("π€ Start Transcription", variant="primary", size="lg")
|
251 |
|
@@ -273,56 +365,64 @@ def create_gradio_interface():
|
|
273 |
visible=False
|
274 |
)
|
275 |
|
276 |
-
def perform_transcription(audio_path, model_size, language, output_format, enable_speaker):
|
277 |
"""Execute audio transcription"""
|
278 |
if not audio_path.strip():
|
279 |
return {
|
280 |
"error": "Please enter audio file path"
|
281 |
}, "Transcription failed: No audio file selected", gr.update(visible=False)
|
282 |
|
283 |
-
#
|
284 |
-
|
285 |
-
file_info = asyncio.run(get_file_info_tool(audio_path))
|
286 |
-
if file_info["status"] != "success":
|
287 |
-
return {
|
288 |
-
"error": f"File does not exist or cannot be accessed: {file_info.get('error_message', 'Unknown error')}"
|
289 |
-
}, "Transcription failed: File inaccessible", gr.update(visible=False)
|
290 |
|
291 |
try:
|
292 |
-
#
|
293 |
-
|
|
|
|
|
|
|
|
|
|
|
294 |
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
|
|
|
|
|
|
|
|
307 |
π Generated files: {len(result.get('saved_files', []))} files
|
308 |
π΅ Audio duration: {result.get('audio_duration', 0):.2f} seconds
|
309 |
π Transcription segments: {result.get('segment_count', 0)} segments
|
310 |
π― Model used: {result.get('model_used', 'N/A')}
|
311 |
π Speaker diarization: {'Enabled' if result.get('speaker_diarization_enabled', False) else 'Disabled'}"""
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
|
|
|
|
|
|
|
|
326 |
|
327 |
# Bind transcription button
|
328 |
transcribe_btn.click(
|
@@ -332,7 +432,8 @@ def create_gradio_interface():
|
|
332 |
model_size_choice,
|
333 |
language_choice,
|
334 |
output_format_choice,
|
335 |
-
enable_speaker_separation
|
|
|
336 |
],
|
337 |
outputs=[
|
338 |
transcribe_result_output,
|
@@ -345,14 +446,43 @@ def create_gradio_interface():
|
|
345 |
with gr.Tab("MP3 File Management"):
|
346 |
gr.Markdown("### π΅ MP3 File Management")
|
347 |
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
356 |
|
357 |
file_list = gr.Textbox(
|
358 |
label="MP3 File List",
|
@@ -365,260 +495,151 @@ def create_gradio_interface():
|
|
365 |
|
366 |
def list_mp3_files(directory):
|
367 |
"""List MP3 files in directory"""
|
368 |
-
|
369 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
370 |
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
375 |
outputs=[file_list]
|
376 |
)
|
377 |
|
378 |
# ==================== Transcription Text Management Tab ====================
|
379 |
with gr.Tab("Transcription Text Management"):
|
380 |
gr.Markdown("### π Transcription Text File Management")
|
381 |
-
gr.Markdown("
|
382 |
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
lines=1
|
390 |
-
)
|
391 |
-
|
392 |
-
# File information display
|
393 |
-
file_info_output = gr.JSON(label="File Information", visible=False)
|
394 |
-
|
395 |
-
with gr.Row():
|
396 |
-
load_file_btn = gr.Button("π Load File", variant="secondary")
|
397 |
-
save_file_btn = gr.Button("πΎ Save File", variant="primary")
|
398 |
-
refresh_btn = gr.Button("π Refresh", variant="secondary")
|
399 |
-
|
400 |
-
with gr.Column(scale=1):
|
401 |
-
# Read control
|
402 |
-
gr.Markdown("#### π Segmented Reading Control")
|
403 |
-
current_position = gr.Number(
|
404 |
-
label="Current Position (bytes)",
|
405 |
-
value=0,
|
406 |
-
minimum=0
|
407 |
-
)
|
408 |
-
chunk_size = gr.Number(
|
409 |
-
label="Chunk Size (bytes)",
|
410 |
-
value=65536, # 64KB
|
411 |
-
minimum=1024,
|
412 |
-
maximum=1048576 # Max 1MB
|
413 |
-
)
|
414 |
-
|
415 |
-
with gr.Row():
|
416 |
-
prev_chunk_btn = gr.Button("β¬
οΈ Previous", size="sm")
|
417 |
-
next_chunk_btn = gr.Button("β‘οΈ Next", size="sm")
|
418 |
-
|
419 |
-
# Progress display
|
420 |
-
progress_display = gr.Textbox(
|
421 |
-
label="Reading Progress",
|
422 |
-
value="No file loaded",
|
423 |
-
interactive=False,
|
424 |
-
lines=3
|
425 |
-
)
|
426 |
-
|
427 |
-
# Write control
|
428 |
-
gr.Markdown("#### βοΈ Write Control")
|
429 |
-
write_mode = gr.Radio(
|
430 |
-
choices=["w", "a", "r+"],
|
431 |
-
value="w",
|
432 |
-
label="Write Mode",
|
433 |
-
info="w=overwrite, a=append, r+=position"
|
434 |
-
)
|
435 |
-
write_position = gr.Number(
|
436 |
-
label="Write Position (bytes)",
|
437 |
-
value=0,
|
438 |
-
minimum=0,
|
439 |
-
visible=False
|
440 |
-
)
|
441 |
|
442 |
-
#
|
|
|
|
|
|
|
443 |
content_editor = gr.Textbox(
|
444 |
label="File Content",
|
445 |
placeholder="File content will be displayed here after loading...",
|
446 |
-
lines=
|
447 |
-
max_lines=
|
448 |
show_copy_button=True,
|
449 |
-
|
450 |
)
|
451 |
|
452 |
# Status information
|
453 |
status_output = gr.Textbox(
|
454 |
-
label="
|
455 |
interactive=False,
|
456 |
lines=2
|
457 |
)
|
458 |
|
459 |
-
|
460 |
-
|
461 |
-
"file_path": "",
|
462 |
-
"file_size": 0,
|
463 |
-
"current_pos": 0,
|
464 |
-
"chunk_size": 65536,
|
465 |
-
"content": ""
|
466 |
-
})
|
467 |
-
|
468 |
-
def load_file_info(file_path):
|
469 |
-
"""Load file information"""
|
470 |
if not file_path.strip():
|
471 |
-
return
|
472 |
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
info,
|
477 |
-
f"File: {info['filename']} | Size: {info['file_size_mb']} MB",
|
478 |
-
"File information loaded successfully",
|
479 |
-
gr.update(visible=True)
|
480 |
-
)
|
481 |
-
else:
|
482 |
-
return (
|
483 |
-
{},
|
484 |
-
f"Error: {info.get('error_message', 'Unknown error')}",
|
485 |
-
"Failed to load file information",
|
486 |
-
gr.update(visible=False)
|
487 |
-
)
|
488 |
-
|
489 |
-
def read_file_content(file_path, position, chunk_size):
|
490 |
-
"""Read file content"""
|
491 |
-
if not file_path.strip():
|
492 |
-
return "", 0, "No file selected", {
|
493 |
-
"file_path": "",
|
494 |
-
"file_size": 0,
|
495 |
-
"current_pos": 0,
|
496 |
-
"chunk_size": chunk_size,
|
497 |
-
"content": ""
|
498 |
-
}
|
499 |
-
|
500 |
-
result = asyncio.run(read_text_file_segments_tool(file_path, int(chunk_size), int(position)))
|
501 |
-
|
502 |
-
if result["status"] == "success":
|
503 |
-
new_state = {
|
504 |
-
"file_path": file_path,
|
505 |
-
"file_size": result["file_size"],
|
506 |
-
"current_pos": result["current_position"],
|
507 |
-
"chunk_size": chunk_size,
|
508 |
-
"content": result["content"]
|
509 |
-
}
|
510 |
|
511 |
-
|
512 |
-
f"
|
513 |
-
f"({result['current_position']}/{result['file_size']} bytes)\n"
|
514 |
-
f"Boundary type: {result.get('actual_boundary', 'Unknown')}\n"
|
515 |
-
f"{'End of file reached' if result['end_of_file_reached'] else 'More content available'}"
|
516 |
-
)
|
517 |
|
518 |
-
|
519 |
-
|
520 |
-
|
521 |
-
|
522 |
-
|
523 |
-
|
524 |
-
|
525 |
-
|
526 |
-
|
527 |
-
|
528 |
-
|
529 |
-
|
530 |
-
|
531 |
-
|
532 |
-
|
533 |
-
|
534 |
-
|
535 |
-
|
536 |
-
|
537 |
-
|
538 |
-
|
539 |
-
|
540 |
-
if not file_path.strip():
|
541 |
-
return "Please select a file first"
|
542 |
-
|
543 |
-
if not content.strip():
|
544 |
-
return "No content to save"
|
545 |
-
|
546 |
-
# Determine whether to use position parameter based on mode
|
547 |
-
write_pos = position if mode == "r+" else None
|
548 |
-
result = write_text_file_content(file_path, content, mode, write_pos)
|
549 |
-
|
550 |
-
if result["status"] == "success":
|
551 |
-
operation_info = f"Operation: {result.get('operation_type', mode)}"
|
552 |
-
size_info = f"Size change: {result.get('size_change', 0):+d} bytes"
|
553 |
-
return f"Save successful!\n{operation_info}\nWrote {result['characters_written']} characters\n{size_info}"
|
554 |
-
else:
|
555 |
-
return f"Save failed: {result.get('error_message', 'Unknown error')}"
|
556 |
-
|
557 |
-
def navigate_chunks(file_state, direction):
|
558 |
-
"""Navigate to previous or next chunk"""
|
559 |
-
if not file_state["file_path"]:
|
560 |
-
return file_state["current_pos"], "Please load a file first"
|
561 |
-
|
562 |
-
chunk_size = file_state["chunk_size"]
|
563 |
-
current_pos = file_state["current_pos"]
|
564 |
-
|
565 |
-
if direction == "prev":
|
566 |
-
new_pos = max(0, current_pos - chunk_size * 2) # Go back two chunks
|
567 |
-
elif direction == "next":
|
568 |
-
new_pos = current_pos # Next chunk starts from current position
|
569 |
-
else:
|
570 |
-
return current_pos, "Invalid navigation direction"
|
571 |
-
|
572 |
-
return new_pos, f"Navigated to position: {new_pos}"
|
573 |
|
574 |
-
# Bind event
|
575 |
load_file_btn.click(
|
576 |
-
|
577 |
inputs=[file_path_input],
|
578 |
-
outputs=[
|
579 |
-
).then(
|
580 |
-
read_file_content,
|
581 |
-
inputs=[file_path_input, current_position, chunk_size],
|
582 |
-
outputs=[content_editor, current_position, progress_display, file_state]
|
583 |
-
)
|
584 |
-
|
585 |
-
refresh_btn.click(
|
586 |
-
read_file_content,
|
587 |
-
inputs=[file_path_input, current_position, chunk_size],
|
588 |
-
outputs=[content_editor, current_position, progress_display, file_state]
|
589 |
-
)
|
590 |
-
|
591 |
-
# Control position input visibility when write mode changes
|
592 |
-
write_mode.change(
|
593 |
-
lambda mode: gr.update(visible=(mode == "r+")),
|
594 |
-
inputs=[write_mode],
|
595 |
-
outputs=[write_position]
|
596 |
-
)
|
597 |
-
|
598 |
-
save_file_btn.click(
|
599 |
-
save_file_content,
|
600 |
-
inputs=[file_path_input, content_editor, write_mode, write_position],
|
601 |
-
outputs=[status_output]
|
602 |
-
)
|
603 |
-
|
604 |
-
prev_chunk_btn.click(
|
605 |
-
lambda state: navigate_chunks(state, "prev"),
|
606 |
-
inputs=[file_state],
|
607 |
-
outputs=[current_position, status_output]
|
608 |
-
).then(
|
609 |
-
read_file_content,
|
610 |
-
inputs=[file_path_input, current_position, chunk_size],
|
611 |
-
outputs=[content_editor, current_position, progress_display, file_state]
|
612 |
-
)
|
613 |
-
|
614 |
-
next_chunk_btn.click(
|
615 |
-
lambda state: navigate_chunks(state, "next"),
|
616 |
-
inputs=[file_state],
|
617 |
-
outputs=[current_position, status_output]
|
618 |
-
).then(
|
619 |
-
read_file_content,
|
620 |
-
inputs=[file_path_input, current_position, chunk_size],
|
621 |
-
outputs=[content_editor, current_position, progress_display, file_state]
|
622 |
)
|
623 |
|
624 |
return demo
|
|
|
5 |
|
6 |
import gradio as gr
|
7 |
import asyncio
|
8 |
+
import os
|
9 |
from ..tools import mcp_tools
|
10 |
from ..tools.download_tools import get_file_info_tool, get_mp3_files_tool, read_text_file_segments_tool
|
11 |
from ..tools.transcription_tools import transcribe_audio_file_tool
|
|
|
12 |
|
13 |
def write_text_file_content(file_path: str, content: str, mode: str = "w", position: int = None):
|
14 |
"""Simple text file writing function"""
|
|
|
33 |
"error_message": str(e)
|
34 |
}
|
35 |
|
36 |
+
def temporarily_set_hf_token(hf_token: str):
|
37 |
+
"""Temporarily set HF_TOKEN in environment"""
|
38 |
+
original_token = os.environ.get("HF_TOKEN")
|
39 |
+
if hf_token and hf_token.strip():
|
40 |
+
os.environ["HF_TOKEN"] = hf_token.strip()
|
41 |
+
print(f"π Using user-provided HF_TOKEN: {hf_token[:10]}...")
|
42 |
+
return original_token
|
43 |
+
|
44 |
+
def restore_hf_token(original_token: str):
|
45 |
+
"""Restore original HF_TOKEN in environment"""
|
46 |
+
if original_token is not None:
|
47 |
+
os.environ["HF_TOKEN"] = original_token
|
48 |
+
elif "HF_TOKEN" in os.environ:
|
49 |
+
del os.environ["HF_TOKEN"]
|
50 |
+
|
51 |
+
def get_default_directories():
|
52 |
+
"""Get default directories based on current environment"""
|
53 |
+
import pathlib
|
54 |
+
|
55 |
+
# Detect environment
|
56 |
+
is_modal = os.environ.get("MODAL_ENVIRONMENT") == "1" or os.path.exists("/modal")
|
57 |
+
is_docker = os.path.exists("/.dockerenv")
|
58 |
+
current_dir = pathlib.Path.cwd()
|
59 |
+
|
60 |
+
# Base directories
|
61 |
+
base_dirs = []
|
62 |
+
|
63 |
+
if is_modal:
|
64 |
+
# Modal environment - use cache directories
|
65 |
+
base_dirs.extend([
|
66 |
+
"/root/cache/apple_podcasts",
|
67 |
+
"/root/cache/xyz_podcasts",
|
68 |
+
"/tmp/downloads"
|
69 |
+
])
|
70 |
+
elif is_docker:
|
71 |
+
# Docker environment
|
72 |
+
base_dirs.extend([
|
73 |
+
"/app/downloads",
|
74 |
+
"/data/downloads",
|
75 |
+
"/tmp/downloads"
|
76 |
+
])
|
77 |
+
else:
|
78 |
+
# Local environment - use current directory and common locations
|
79 |
+
base_dirs.extend([
|
80 |
+
str(current_dir / "downloads"),
|
81 |
+
str(current_dir / "cache" / "apple_podcasts"),
|
82 |
+
str(current_dir / "cache" / "xyz_podcasts"),
|
83 |
+
"~/Downloads",
|
84 |
+
"~/Music"
|
85 |
+
])
|
86 |
+
|
87 |
+
# Add common directories
|
88 |
+
base_dirs.extend(["/tmp", "."])
|
89 |
+
|
90 |
+
# Filter out duplicates while preserving order
|
91 |
+
seen = set()
|
92 |
+
unique_dirs = []
|
93 |
+
for d in base_dirs:
|
94 |
+
if d not in seen:
|
95 |
+
seen.add(d)
|
96 |
+
unique_dirs.append(d)
|
97 |
+
|
98 |
+
# Determine default directory
|
99 |
+
default_dir = unique_dirs[0] if unique_dirs else str(current_dir / "downloads")
|
100 |
+
|
101 |
+
return unique_dirs, default_dir
|
102 |
+
|
103 |
def create_gradio_interface():
|
104 |
"""Create Gradio interface
|
105 |
|
|
|
195 |
info="Identify different speakers (requires Hugging Face Token)"
|
196 |
)
|
197 |
|
198 |
+
# HF Token input for speaker diarization
|
199 |
+
hf_token_input_download = gr.Textbox(
|
200 |
+
label="Hugging Face Token (Optional)",
|
201 |
+
placeholder="Enter your HF token here to override environment variable",
|
202 |
+
type="password",
|
203 |
+
info="Required for speaker diarization. If provided, will override HF_TOKEN environment variable."
|
204 |
+
)
|
205 |
+
|
206 |
download_btn = gr.Button("π₯ Start Download", variant="primary")
|
207 |
result_output = gr.JSON(label="Download Results")
|
208 |
|
209 |
+
async def download_podcast_and_transcribe(url, platform, auto_transcribe, enable_speaker, hf_token):
|
210 |
"""Call corresponding download tool based on selected platform"""
|
211 |
+
# Temporarily set HF_TOKEN if provided
|
212 |
+
original_token = temporarily_set_hf_token(hf_token)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
213 |
|
|
|
214 |
try:
|
215 |
+
if platform == "Apple Podcast":
|
216 |
+
download_result = await mcp_tools.download_apple_podcast(url)
|
217 |
+
else:
|
218 |
+
download_result = await mcp_tools.download_xyz_podcast(url)
|
|
|
|
|
|
|
|
|
|
|
219 |
|
220 |
+
# 2. Check if download was successful
|
221 |
+
if download_result["status"] != "success":
|
222 |
+
return {
|
223 |
+
"download_status": "failed",
|
224 |
+
"error_message": download_result.get("error_message", "Download failed"),
|
225 |
+
"transcription_status": "not_started"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
226 |
}
|
|
|
227 |
|
228 |
+
# 3. If not auto-transcribing, return only download results
|
229 |
+
if not auto_transcribe:
|
230 |
+
return {
|
231 |
+
"download_status": "success",
|
232 |
+
"audio_file": download_result["audio_file_path"],
|
233 |
+
"transcription_status": "skipped (user chose not to auto-transcribe)"
|
234 |
}
|
235 |
|
236 |
+
# 4. Start transcription
|
237 |
+
try:
|
238 |
+
audio_path = download_result["audio_file_path"]
|
239 |
+
print(f"Transcribing audio file: {audio_path}")
|
240 |
+
transcribe_result = await mcp_tools.transcribe_audio_file(
|
241 |
+
audio_path,
|
242 |
+
model_size="turbo",
|
243 |
+
language=None,
|
244 |
+
output_format="srt",
|
245 |
+
enable_speaker_diarization=enable_speaker
|
246 |
+
)
|
247 |
+
|
248 |
+
# 5. Merge results
|
249 |
+
result = {
|
250 |
+
"download_status": "success",
|
251 |
+
"audio_file": audio_path,
|
252 |
+
"transcription_status": "success",
|
253 |
+
"txt_file_path": transcribe_result.get("txt_file_path"),
|
254 |
+
"srt_file_path": transcribe_result.get("srt_file_path"),
|
255 |
+
"transcription_details": {
|
256 |
+
"model_used": transcribe_result.get("model_used"),
|
257 |
+
"segment_count": transcribe_result.get("segment_count"),
|
258 |
+
"audio_duration": transcribe_result.get("audio_duration"),
|
259 |
+
"saved_files": transcribe_result.get("saved_files", []),
|
260 |
+
"speaker_diarization_enabled": transcribe_result.get("speaker_diarization_enabled", False)
|
261 |
+
}
|
262 |
+
}
|
263 |
+
|
264 |
+
# 6. Add speaker diarization info if enabled
|
265 |
+
if enable_speaker and transcribe_result.get("speaker_diarization_enabled", False):
|
266 |
+
result["speaker_diarization"] = {
|
267 |
+
"global_speaker_count": transcribe_result.get("global_speaker_count", 0),
|
268 |
+
"speaker_summary": transcribe_result.get("speaker_summary", {})
|
269 |
+
}
|
270 |
+
|
271 |
+
return result
|
272 |
+
|
273 |
+
except Exception as e:
|
274 |
+
return {
|
275 |
+
"download_status": "success",
|
276 |
+
"audio_file": download_result["audio_file_path"],
|
277 |
+
"transcription_status": "failed",
|
278 |
+
"error_message": str(e)
|
279 |
+
}
|
280 |
+
|
281 |
+
finally:
|
282 |
+
# Restore original HF_TOKEN
|
283 |
+
restore_hf_token(original_token)
|
284 |
|
285 |
# Bind callback function
|
286 |
download_btn.click(
|
287 |
download_podcast_and_transcribe,
|
288 |
+
inputs=[url_input, platform_choice, auto_transcribe, enable_speaker_diarization, hf_token_input_download],
|
289 |
outputs=result_output
|
290 |
)
|
291 |
|
|
|
319 |
)
|
320 |
|
321 |
with gr.Row():
|
322 |
+
with gr.Column():
|
323 |
+
output_format_choice = gr.Radio(
|
324 |
+
choices=["srt", "txt", "json"],
|
325 |
+
value="srt",
|
326 |
+
label="Output Format"
|
327 |
+
)
|
328 |
+
with gr.Column():
|
329 |
+
enable_speaker_separation = gr.Checkbox(
|
330 |
+
label="Enable speaker diarization",
|
331 |
+
value=False,
|
332 |
+
info="Requires Hugging Face Token"
|
333 |
+
)
|
334 |
+
# HF Token input for speaker diarization
|
335 |
+
hf_token_input_transcribe = gr.Textbox(
|
336 |
+
label="Hugging Face Token (Optional)",
|
337 |
+
placeholder="Enter your HF token here to override environment variable",
|
338 |
+
type="password",
|
339 |
+
info="Required for speaker diarization. If provided, will override HF_TOKEN environment variable."
|
340 |
+
)
|
341 |
|
342 |
transcribe_btn = gr.Button("π€ Start Transcription", variant="primary", size="lg")
|
343 |
|
|
|
365 |
visible=False
|
366 |
)
|
367 |
|
368 |
+
def perform_transcription(audio_path, model_size, language, output_format, enable_speaker, hf_token):
|
369 |
"""Execute audio transcription"""
|
370 |
if not audio_path.strip():
|
371 |
return {
|
372 |
"error": "Please enter audio file path"
|
373 |
}, "Transcription failed: No audio file selected", gr.update(visible=False)
|
374 |
|
375 |
+
# Temporarily set HF_TOKEN if provided
|
376 |
+
original_token = temporarily_set_hf_token(hf_token)
|
|
|
|
|
|
|
|
|
|
|
377 |
|
378 |
try:
|
379 |
+
# Check if file exists
|
380 |
+
import asyncio
|
381 |
+
file_info = asyncio.run(get_file_info_tool(audio_path))
|
382 |
+
if file_info["status"] != "success":
|
383 |
+
return {
|
384 |
+
"error": f"File does not exist or cannot be accessed: {file_info.get('error_message', 'Unknown error')}"
|
385 |
+
}, "Transcription failed: File inaccessible", gr.update(visible=False)
|
386 |
|
387 |
+
try:
|
388 |
+
# Process language parameter
|
389 |
+
lang = None if language == "auto" else language
|
390 |
+
|
391 |
+
# Call transcription tool
|
392 |
+
result = asyncio.run(transcribe_audio_file_tool(
|
393 |
+
audio_file_path=audio_path,
|
394 |
+
model_size=model_size,
|
395 |
+
language=lang,
|
396 |
+
output_format=output_format,
|
397 |
+
enable_speaker_diarization=enable_speaker
|
398 |
+
))
|
399 |
+
|
400 |
+
# Prepare status information
|
401 |
+
if result.get("processing_status") == "success":
|
402 |
+
status_text = f"""β
Transcription completed!
|
403 |
π Generated files: {len(result.get('saved_files', []))} files
|
404 |
π΅ Audio duration: {result.get('audio_duration', 0):.2f} seconds
|
405 |
π Transcription segments: {result.get('segment_count', 0)} segments
|
406 |
π― Model used: {result.get('model_used', 'N/A')}
|
407 |
π Speaker diarization: {'Enabled' if result.get('speaker_diarization_enabled', False) else 'Disabled'}"""
|
408 |
+
|
409 |
+
# Show speaker information
|
410 |
+
speaker_visible = result.get('speaker_diarization_enabled', False) and result.get('global_speaker_count', 0) > 0
|
411 |
+
speaker_info = result.get('speaker_summary', {}) if speaker_visible else {}
|
412 |
+
|
413 |
+
return result, status_text, gr.update(visible=speaker_visible, value=speaker_info)
|
414 |
+
else:
|
415 |
+
error_msg = result.get('error_message', 'Unknown error')
|
416 |
+
return result, f"β Transcription failed: {error_msg}", gr.update(visible=False)
|
417 |
+
|
418 |
+
except Exception as e:
|
419 |
+
return {
|
420 |
+
"error": f"Exception occurred during transcription: {str(e)}"
|
421 |
+
}, f"β Transcription exception: {str(e)}", gr.update(visible=False)
|
422 |
+
|
423 |
+
finally:
|
424 |
+
# Restore original HF_TOKEN
|
425 |
+
restore_hf_token(original_token)
|
426 |
|
427 |
# Bind transcription button
|
428 |
transcribe_btn.click(
|
|
|
432 |
model_size_choice,
|
433 |
language_choice,
|
434 |
output_format_choice,
|
435 |
+
enable_speaker_separation,
|
436 |
+
hf_token_input_transcribe
|
437 |
],
|
438 |
outputs=[
|
439 |
transcribe_result_output,
|
|
|
446 |
with gr.Tab("MP3 File Management"):
|
447 |
gr.Markdown("### π΅ MP3 File Management")
|
448 |
|
449 |
+
# Get environment-specific directories
|
450 |
+
available_dirs, default_dir = get_default_directories()
|
451 |
+
|
452 |
+
# Display environment info
|
453 |
+
import pathlib
|
454 |
+
is_modal = os.environ.get("MODAL_ENVIRONMENT") == "1" or os.path.exists("/modal")
|
455 |
+
is_docker = os.path.exists("/.dockerenv")
|
456 |
+
current_dir = pathlib.Path.cwd()
|
457 |
+
|
458 |
+
if is_modal:
|
459 |
+
env_info = "π **Modal Environment Detected** - Using Modal cache directories"
|
460 |
+
elif is_docker:
|
461 |
+
env_info = "π³ **Docker Environment Detected** - Using container directories"
|
462 |
+
else:
|
463 |
+
env_info = f"π» **Local Environment Detected** - Using current directory: `{current_dir}`"
|
464 |
+
|
465 |
+
gr.Markdown(env_info)
|
466 |
+
|
467 |
+
with gr.Row():
|
468 |
+
with gr.Column(scale=3):
|
469 |
+
# Flexible directory path input
|
470 |
+
custom_dir_input = gr.Textbox(
|
471 |
+
label="Custom Directory Path",
|
472 |
+
placeholder="Enter custom directory path (e.g., /path/to/your/audio/files)",
|
473 |
+
lines=1,
|
474 |
+
value=default_dir
|
475 |
+
)
|
476 |
+
with gr.Column(scale=2):
|
477 |
+
# Quick select for environment-specific directories
|
478 |
+
quick_select = gr.Dropdown(
|
479 |
+
label="Quick Select",
|
480 |
+
choices=available_dirs,
|
481 |
+
value=default_dir,
|
482 |
+
info="Select directories based on current environment"
|
483 |
+
)
|
484 |
+
with gr.Column(scale=1):
|
485 |
+
scan_btn = gr.Button("π Scan Directory", variant="primary")
|
486 |
|
487 |
file_list = gr.Textbox(
|
488 |
label="MP3 File List",
|
|
|
495 |
|
496 |
def list_mp3_files(directory):
|
497 |
"""List MP3 files in directory"""
|
498 |
+
if not directory or not directory.strip():
|
499 |
+
return "Please enter a directory path"
|
500 |
+
|
501 |
+
try:
|
502 |
+
result = asyncio.run(get_mp3_files_tool(directory.strip()))
|
503 |
+
|
504 |
+
# Check if there's an error
|
505 |
+
if "error_message" in result:
|
506 |
+
return f"β Error scanning directory: {result['error_message']}"
|
507 |
+
|
508 |
+
# Get file list
|
509 |
+
total_files = result.get('total_files', 0)
|
510 |
+
file_list = result.get('file_list', [])
|
511 |
+
scanned_directory = result.get('scanned_directory', directory)
|
512 |
+
|
513 |
+
if total_files == 0:
|
514 |
+
return f"π No MP3 files found in: {scanned_directory}"
|
515 |
+
|
516 |
+
# Format file list for display
|
517 |
+
display_lines = [
|
518 |
+
f"π Found {total_files} MP3 file{'s' if total_files != 1 else ''} in: {scanned_directory}",
|
519 |
+
"=" * 60
|
520 |
+
]
|
521 |
+
|
522 |
+
for i, file_info in enumerate(file_list, 1):
|
523 |
+
filename = file_info.get('filename', 'Unknown')
|
524 |
+
size_mb = file_info.get('file_size_mb', 0)
|
525 |
+
created_time = file_info.get('created_time', 'Unknown')
|
526 |
+
full_path = file_info.get('full_path', 'Unknown')
|
527 |
+
|
528 |
+
display_lines.append(
|
529 |
+
f"{i:2d}. π {filename}\n"
|
530 |
+
f" πΎ Size: {size_mb:.2f} MB\n"
|
531 |
+
f" π
Created: {created_time}\n"
|
532 |
+
f" π Path: {full_path}"
|
533 |
+
)
|
534 |
+
|
535 |
+
return "\n".join(display_lines)
|
536 |
+
|
537 |
+
except Exception as e:
|
538 |
+
return f"β Exception occurred while scanning directory: {str(e)}"
|
539 |
|
540 |
+
def use_quick_select(selected_path):
|
541 |
+
"""Use quick select path and auto-scan"""
|
542 |
+
if selected_path:
|
543 |
+
return selected_path, list_mp3_files(selected_path)
|
544 |
+
return "", ""
|
545 |
+
|
546 |
+
def scan_directory(custom_path, quick_path):
|
547 |
+
"""Scan the directory based on custom input or quick select"""
|
548 |
+
directory = custom_path.strip() if custom_path.strip() else quick_path
|
549 |
+
return list_mp3_files(directory)
|
550 |
+
|
551 |
+
# Bind callback functions
|
552 |
+
quick_select.change(
|
553 |
+
use_quick_select,
|
554 |
+
inputs=[quick_select],
|
555 |
+
outputs=[custom_dir_input, file_list]
|
556 |
+
)
|
557 |
+
|
558 |
+
scan_btn.click(
|
559 |
+
scan_directory,
|
560 |
+
inputs=[custom_dir_input, quick_select],
|
561 |
+
outputs=[file_list]
|
562 |
+
)
|
563 |
+
|
564 |
+
# Auto-scan when custom directory is entered
|
565 |
+
custom_dir_input.change(
|
566 |
+
lambda x: list_mp3_files(x) if x.strip() else "",
|
567 |
+
inputs=[custom_dir_input],
|
568 |
outputs=[file_list]
|
569 |
)
|
570 |
|
571 |
# ==================== Transcription Text Management Tab ====================
|
572 |
with gr.Tab("Transcription Text Management"):
|
573 |
gr.Markdown("### π Transcription Text File Management")
|
574 |
+
gr.Markdown("View TXT and SRT files generated from audio transcription")
|
575 |
|
576 |
+
# File path input
|
577 |
+
file_path_input = gr.Textbox(
|
578 |
+
label="File Path",
|
579 |
+
placeholder="Enter path to TXT or SRT file to read",
|
580 |
+
lines=1
|
581 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
582 |
|
583 |
+
# Load button
|
584 |
+
load_file_btn = gr.Button("π Load File", variant="primary")
|
585 |
+
|
586 |
+
# Text content viewer
|
587 |
content_editor = gr.Textbox(
|
588 |
label="File Content",
|
589 |
placeholder="File content will be displayed here after loading...",
|
590 |
+
lines=25,
|
591 |
+
max_lines=40,
|
592 |
show_copy_button=True,
|
593 |
+
interactive=False
|
594 |
)
|
595 |
|
596 |
# Status information
|
597 |
status_output = gr.Textbox(
|
598 |
+
label="Status",
|
599 |
interactive=False,
|
600 |
lines=2
|
601 |
)
|
602 |
|
603 |
+
def load_and_display_file(file_path):
|
604 |
+
"""Load and display complete file content"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
605 |
if not file_path.strip():
|
606 |
+
return "Please enter a file path", "β No file path provided"
|
607 |
|
608 |
+
try:
|
609 |
+
# Get file info first
|
610 |
+
info = asyncio.run(get_file_info_tool(file_path))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
611 |
|
612 |
+
if info["status"] != "success":
|
613 |
+
return "", f"β Error: {info.get('error_message', 'Unknown error')}"
|
|
|
|
|
|
|
|
|
614 |
|
615 |
+
# Check file size (warn for very large files)
|
616 |
+
file_size_mb = info.get('file_size_mb', 0)
|
617 |
+
if file_size_mb > 10: # Warn for files larger than 10MB
|
618 |
+
return "", f"β οΈ File is too large ({file_size_mb:.2f} MB). Please use a smaller file for viewing."
|
619 |
+
|
620 |
+
# Read entire file content
|
621 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
622 |
+
content = f.read()
|
623 |
+
|
624 |
+
# Status message
|
625 |
+
status = f"β
File loaded successfully: {info.get('filename', 'Unknown')}\nπ Size: {file_size_mb:.2f} MB"
|
626 |
+
|
627 |
+
return content, status
|
628 |
+
|
629 |
+
except UnicodeDecodeError:
|
630 |
+
return "", "β Error: File contains non-text content or encoding is not UTF-8"
|
631 |
+
except FileNotFoundError:
|
632 |
+
return "", "β Error: File not found"
|
633 |
+
except PermissionError:
|
634 |
+
return "", "β Error: Permission denied to read file"
|
635 |
+
except Exception as e:
|
636 |
+
return "", f"β Error: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
637 |
|
638 |
+
# Bind event handler
|
639 |
load_file_btn.click(
|
640 |
+
load_and_display_file,
|
641 |
inputs=[file_path_input],
|
642 |
+
outputs=[content_editor, status_output]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
643 |
)
|
644 |
|
645 |
return demo
|