richard-su commited on
Commit
e98f763
Β·
verified Β·
1 Parent(s): 4bbc337

Upload folder using huggingface_hub

Browse files
src/ui/__pycache__/gradio_ui.cpython-310.pyc CHANGED
Binary files a/src/ui/__pycache__/gradio_ui.cpython-310.pyc and b/src/ui/__pycache__/gradio_ui.cpython-310.pyc differ
 
src/ui/gradio_ui.py CHANGED
@@ -5,10 +5,10 @@ Contains all UI components and interface logic
5
 
6
  import gradio as gr
7
  import asyncio
 
8
  from ..tools import mcp_tools
9
  from ..tools.download_tools import get_file_info_tool, get_mp3_files_tool, read_text_file_segments_tool
10
  from ..tools.transcription_tools import transcribe_audio_file_tool
11
- import os
12
 
13
  def write_text_file_content(file_path: str, content: str, mode: str = "w", position: int = None):
14
  """Simple text file writing function"""
@@ -33,6 +33,73 @@ def write_text_file_content(file_path: str, content: str, mode: str = "w", posit
33
  "error_message": str(e)
34
  }
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  def create_gradio_interface():
37
  """Create Gradio interface
38
 
@@ -128,81 +195,97 @@ def create_gradio_interface():
128
  info="Identify different speakers (requires Hugging Face Token)"
129
  )
130
 
 
 
 
 
 
 
 
 
131
  download_btn = gr.Button("πŸ“₯ Start Download", variant="primary")
132
  result_output = gr.JSON(label="Download Results")
133
 
134
- async def download_podcast_and_transcribe(url, platform, auto_transcribe, enable_speaker):
135
  """Call corresponding download tool based on selected platform"""
136
- if platform == "Apple Podcast":
137
- download_result = await mcp_tools.download_apple_podcast(url)
138
- else:
139
- download_result = await mcp_tools.download_xyz_podcast(url)
140
-
141
- # 2. Check if download was successful
142
- if download_result["status"] != "success":
143
- return {
144
- "download_status": "failed",
145
- "error_message": download_result.get("error_message", "Download failed"),
146
- "transcription_status": "not_started"
147
- }
148
-
149
- # 3. If not auto-transcribing, return only download results
150
- if not auto_transcribe:
151
- return {
152
- "download_status": "success",
153
- "audio_file": download_result["audio_file_path"],
154
- "transcription_status": "skipped (user chose not to auto-transcribe)"
155
- }
156
 
157
- # 4. Start transcription
158
  try:
159
- audio_path = download_result["audio_file_path"]
160
- print(f"Transcribing audio file: {audio_path}")
161
- transcribe_result = await mcp_tools.transcribe_audio_file(
162
- audio_path,
163
- model_size="turbo",
164
- language=None,
165
- output_format="srt",
166
- enable_speaker_diarization=enable_speaker
167
- )
168
 
169
- # 5. Merge results
170
- result = {
171
- "download_status": "success",
172
- "audio_file": audio_path,
173
- "transcription_status": "success",
174
- "txt_file_path": transcribe_result.get("txt_file_path"),
175
- "srt_file_path": transcribe_result.get("srt_file_path"),
176
- "transcription_details": {
177
- "model_used": transcribe_result.get("model_used"),
178
- "segment_count": transcribe_result.get("segment_count"),
179
- "audio_duration": transcribe_result.get("audio_duration"),
180
- "saved_files": transcribe_result.get("saved_files", []),
181
- "speaker_diarization_enabled": transcribe_result.get("speaker_diarization_enabled", False)
182
  }
183
- }
184
 
185
- # 6. Add speaker diarization info if enabled
186
- if enable_speaker and transcribe_result.get("speaker_diarization_enabled", False):
187
- result["speaker_diarization"] = {
188
- "global_speaker_count": transcribe_result.get("global_speaker_count", 0),
189
- "speaker_summary": transcribe_result.get("speaker_summary", {})
 
190
  }
191
 
192
- return result
193
-
194
- except Exception as e:
195
- return {
196
- "download_status": "success",
197
- "audio_file": download_result["audio_file_path"],
198
- "transcription_status": "failed",
199
- "error_message": str(e)
200
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
  # Bind callback function
203
  download_btn.click(
204
  download_podcast_and_transcribe,
205
- inputs=[url_input, platform_choice, auto_transcribe, enable_speaker_diarization],
206
  outputs=result_output
207
  )
208
 
@@ -236,16 +319,25 @@ def create_gradio_interface():
236
  )
237
 
238
  with gr.Row():
239
- output_format_choice = gr.Radio(
240
- choices=["srt", "txt", "json"],
241
- value="srt",
242
- label="Output Format"
243
- )
244
- enable_speaker_separation = gr.Checkbox(
245
- label="Enable speaker diarization",
246
- value=False,
247
- info="Requires Hugging Face Token"
248
- )
 
 
 
 
 
 
 
 
 
249
 
250
  transcribe_btn = gr.Button("🎀 Start Transcription", variant="primary", size="lg")
251
 
@@ -273,56 +365,64 @@ def create_gradio_interface():
273
  visible=False
274
  )
275
 
276
- def perform_transcription(audio_path, model_size, language, output_format, enable_speaker):
277
  """Execute audio transcription"""
278
  if not audio_path.strip():
279
  return {
280
  "error": "Please enter audio file path"
281
  }, "Transcription failed: No audio file selected", gr.update(visible=False)
282
 
283
- # Check if file exists
284
- import asyncio
285
- file_info = asyncio.run(get_file_info_tool(audio_path))
286
- if file_info["status"] != "success":
287
- return {
288
- "error": f"File does not exist or cannot be accessed: {file_info.get('error_message', 'Unknown error')}"
289
- }, "Transcription failed: File inaccessible", gr.update(visible=False)
290
 
291
  try:
292
- # Process language parameter
293
- lang = None if language == "auto" else language
 
 
 
 
 
294
 
295
- # Call transcription tool
296
- result = asyncio.run(transcribe_audio_file_tool(
297
- audio_file_path=audio_path,
298
- model_size=model_size,
299
- language=lang,
300
- output_format=output_format,
301
- enable_speaker_diarization=enable_speaker
302
- ))
303
-
304
- # Prepare status information
305
- if result.get("processing_status") == "success":
306
- status_text = f"""βœ… Transcription completed!
 
 
 
 
307
  πŸ“ Generated files: {len(result.get('saved_files', []))} files
308
  🎡 Audio duration: {result.get('audio_duration', 0):.2f} seconds
309
  πŸ“ Transcription segments: {result.get('segment_count', 0)} segments
310
  🎯 Model used: {result.get('model_used', 'N/A')}
311
  🎭 Speaker diarization: {'Enabled' if result.get('speaker_diarization_enabled', False) else 'Disabled'}"""
312
-
313
- # Show speaker information
314
- speaker_visible = result.get('speaker_diarization_enabled', False) and result.get('global_speaker_count', 0) > 0
315
- speaker_info = result.get('speaker_summary', {}) if speaker_visible else {}
316
-
317
- return result, status_text, gr.update(visible=speaker_visible, value=speaker_info)
318
- else:
319
- error_msg = result.get('error_message', 'Unknown error')
320
- return result, f"❌ Transcription failed: {error_msg}", gr.update(visible=False)
321
-
322
- except Exception as e:
323
- return {
324
- "error": f"Exception occurred during transcription: {str(e)}"
325
- }, f"❌ Transcription exception: {str(e)}", gr.update(visible=False)
 
 
 
 
326
 
327
  # Bind transcription button
328
  transcribe_btn.click(
@@ -332,7 +432,8 @@ def create_gradio_interface():
332
  model_size_choice,
333
  language_choice,
334
  output_format_choice,
335
- enable_speaker_separation
 
336
  ],
337
  outputs=[
338
  transcribe_result_output,
@@ -345,14 +446,43 @@ def create_gradio_interface():
345
  with gr.Tab("MP3 File Management"):
346
  gr.Markdown("### 🎡 MP3 File Management")
347
 
348
- dir_input = gr.Dropdown(
349
- label="Directory Path",
350
- choices=[
351
- "/root/cache/apple_podcasts",
352
- "/root/cache/xyz_podcasts"
353
- ],
354
- value="/root/cache/apple_podcasts"
355
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
 
357
  file_list = gr.Textbox(
358
  label="MP3 File List",
@@ -365,260 +495,151 @@ def create_gradio_interface():
365
 
366
  def list_mp3_files(directory):
367
  """List MP3 files in directory"""
368
- files = asyncio.run(get_mp3_files_tool(directory))
369
- return "\n".join(files) if files else "No MP3 files found in directory"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
 
371
- # Bind callback function
372
- dir_input.change(
373
- list_mp3_files,
374
- inputs=[dir_input],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
375
  outputs=[file_list]
376
  )
377
 
378
  # ==================== Transcription Text Management Tab ====================
379
  with gr.Tab("Transcription Text Management"):
380
  gr.Markdown("### πŸ“ Transcription Text File Management")
381
- gr.Markdown("Manage and edit TXT and SRT files generated from audio transcription")
382
 
383
- with gr.Row():
384
- with gr.Column(scale=2):
385
- # File path input
386
- file_path_input = gr.Textbox(
387
- label="File Path",
388
- placeholder="Enter path to TXT or SRT file to read",
389
- lines=1
390
- )
391
-
392
- # File information display
393
- file_info_output = gr.JSON(label="File Information", visible=False)
394
-
395
- with gr.Row():
396
- load_file_btn = gr.Button("πŸ“‚ Load File", variant="secondary")
397
- save_file_btn = gr.Button("πŸ’Ύ Save File", variant="primary")
398
- refresh_btn = gr.Button("πŸ”„ Refresh", variant="secondary")
399
-
400
- with gr.Column(scale=1):
401
- # Read control
402
- gr.Markdown("#### πŸ“– Segmented Reading Control")
403
- current_position = gr.Number(
404
- label="Current Position (bytes)",
405
- value=0,
406
- minimum=0
407
- )
408
- chunk_size = gr.Number(
409
- label="Chunk Size (bytes)",
410
- value=65536, # 64KB
411
- minimum=1024,
412
- maximum=1048576 # Max 1MB
413
- )
414
-
415
- with gr.Row():
416
- prev_chunk_btn = gr.Button("⬅️ Previous", size="sm")
417
- next_chunk_btn = gr.Button("➑️ Next", size="sm")
418
-
419
- # Progress display
420
- progress_display = gr.Textbox(
421
- label="Reading Progress",
422
- value="No file loaded",
423
- interactive=False,
424
- lines=3
425
- )
426
-
427
- # Write control
428
- gr.Markdown("#### ✏️ Write Control")
429
- write_mode = gr.Radio(
430
- choices=["w", "a", "r+"],
431
- value="w",
432
- label="Write Mode",
433
- info="w=overwrite, a=append, r+=position"
434
- )
435
- write_position = gr.Number(
436
- label="Write Position (bytes)",
437
- value=0,
438
- minimum=0,
439
- visible=False
440
- )
441
 
442
- # Text content editor
 
 
 
443
  content_editor = gr.Textbox(
444
  label="File Content",
445
  placeholder="File content will be displayed here after loading...",
446
- lines=20,
447
- max_lines=30,
448
  show_copy_button=True,
449
- autoscroll=False
450
  )
451
 
452
  # Status information
453
  status_output = gr.Textbox(
454
- label="Operation Status",
455
  interactive=False,
456
  lines=2
457
  )
458
 
459
- # Internal state variables
460
- file_state = gr.State({
461
- "file_path": "",
462
- "file_size": 0,
463
- "current_pos": 0,
464
- "chunk_size": 65536,
465
- "content": ""
466
- })
467
-
468
- def load_file_info(file_path):
469
- """Load file information"""
470
  if not file_path.strip():
471
- return {}, "Please enter file path", "No file selected", gr.update(visible=False)
472
 
473
- info = asyncio.run(get_file_info_tool(file_path))
474
- if info["status"] == "success":
475
- return (
476
- info,
477
- f"File: {info['filename']} | Size: {info['file_size_mb']} MB",
478
- "File information loaded successfully",
479
- gr.update(visible=True)
480
- )
481
- else:
482
- return (
483
- {},
484
- f"Error: {info.get('error_message', 'Unknown error')}",
485
- "Failed to load file information",
486
- gr.update(visible=False)
487
- )
488
-
489
- def read_file_content(file_path, position, chunk_size):
490
- """Read file content"""
491
- if not file_path.strip():
492
- return "", 0, "No file selected", {
493
- "file_path": "",
494
- "file_size": 0,
495
- "current_pos": 0,
496
- "chunk_size": chunk_size,
497
- "content": ""
498
- }
499
-
500
- result = asyncio.run(read_text_file_segments_tool(file_path, int(chunk_size), int(position)))
501
-
502
- if result["status"] == "success":
503
- new_state = {
504
- "file_path": file_path,
505
- "file_size": result["file_size"],
506
- "current_pos": result["current_position"],
507
- "chunk_size": chunk_size,
508
- "content": result["content"]
509
- }
510
 
511
- progress_text = (
512
- f"Progress: {result['progress_percentage']:.1f}% "
513
- f"({result['current_position']}/{result['file_size']} bytes)\n"
514
- f"Boundary type: {result.get('actual_boundary', 'Unknown')}\n"
515
- f"{'End of file reached' if result['end_of_file_reached'] else 'More content available'}"
516
- )
517
 
518
- return (
519
- result["content"],
520
- result["current_position"],
521
- progress_text,
522
- new_state
523
- )
524
- else:
525
- return (
526
- "",
527
- position,
528
- f"Read failed: {result.get('error_message', 'Unknown error')}",
529
- {
530
- "file_path": file_path,
531
- "file_size": 0,
532
- "current_pos": position,
533
- "chunk_size": chunk_size,
534
- "content": ""
535
- }
536
- )
537
-
538
- def save_file_content(file_path, content, mode, position):
539
- """Save file content"""
540
- if not file_path.strip():
541
- return "Please select a file first"
542
-
543
- if not content.strip():
544
- return "No content to save"
545
-
546
- # Determine whether to use position parameter based on mode
547
- write_pos = position if mode == "r+" else None
548
- result = write_text_file_content(file_path, content, mode, write_pos)
549
-
550
- if result["status"] == "success":
551
- operation_info = f"Operation: {result.get('operation_type', mode)}"
552
- size_info = f"Size change: {result.get('size_change', 0):+d} bytes"
553
- return f"Save successful!\n{operation_info}\nWrote {result['characters_written']} characters\n{size_info}"
554
- else:
555
- return f"Save failed: {result.get('error_message', 'Unknown error')}"
556
-
557
- def navigate_chunks(file_state, direction):
558
- """Navigate to previous or next chunk"""
559
- if not file_state["file_path"]:
560
- return file_state["current_pos"], "Please load a file first"
561
-
562
- chunk_size = file_state["chunk_size"]
563
- current_pos = file_state["current_pos"]
564
-
565
- if direction == "prev":
566
- new_pos = max(0, current_pos - chunk_size * 2) # Go back two chunks
567
- elif direction == "next":
568
- new_pos = current_pos # Next chunk starts from current position
569
- else:
570
- return current_pos, "Invalid navigation direction"
571
-
572
- return new_pos, f"Navigated to position: {new_pos}"
573
 
574
- # Bind event handlers
575
  load_file_btn.click(
576
- load_file_info,
577
  inputs=[file_path_input],
578
- outputs=[file_info_output, progress_display, status_output, file_info_output]
579
- ).then(
580
- read_file_content,
581
- inputs=[file_path_input, current_position, chunk_size],
582
- outputs=[content_editor, current_position, progress_display, file_state]
583
- )
584
-
585
- refresh_btn.click(
586
- read_file_content,
587
- inputs=[file_path_input, current_position, chunk_size],
588
- outputs=[content_editor, current_position, progress_display, file_state]
589
- )
590
-
591
- # Control position input visibility when write mode changes
592
- write_mode.change(
593
- lambda mode: gr.update(visible=(mode == "r+")),
594
- inputs=[write_mode],
595
- outputs=[write_position]
596
- )
597
-
598
- save_file_btn.click(
599
- save_file_content,
600
- inputs=[file_path_input, content_editor, write_mode, write_position],
601
- outputs=[status_output]
602
- )
603
-
604
- prev_chunk_btn.click(
605
- lambda state: navigate_chunks(state, "prev"),
606
- inputs=[file_state],
607
- outputs=[current_position, status_output]
608
- ).then(
609
- read_file_content,
610
- inputs=[file_path_input, current_position, chunk_size],
611
- outputs=[content_editor, current_position, progress_display, file_state]
612
- )
613
-
614
- next_chunk_btn.click(
615
- lambda state: navigate_chunks(state, "next"),
616
- inputs=[file_state],
617
- outputs=[current_position, status_output]
618
- ).then(
619
- read_file_content,
620
- inputs=[file_path_input, current_position, chunk_size],
621
- outputs=[content_editor, current_position, progress_display, file_state]
622
  )
623
 
624
  return demo
 
5
 
6
  import gradio as gr
7
  import asyncio
8
+ import os
9
  from ..tools import mcp_tools
10
  from ..tools.download_tools import get_file_info_tool, get_mp3_files_tool, read_text_file_segments_tool
11
  from ..tools.transcription_tools import transcribe_audio_file_tool
 
12
 
13
  def write_text_file_content(file_path: str, content: str, mode: str = "w", position: int = None):
14
  """Simple text file writing function"""
 
33
  "error_message": str(e)
34
  }
35
 
36
+ def temporarily_set_hf_token(hf_token: str):
37
+ """Temporarily set HF_TOKEN in environment"""
38
+ original_token = os.environ.get("HF_TOKEN")
39
+ if hf_token and hf_token.strip():
40
+ os.environ["HF_TOKEN"] = hf_token.strip()
41
+ print(f"πŸ”‘ Using user-provided HF_TOKEN: {hf_token[:10]}...")
42
+ return original_token
43
+
44
+ def restore_hf_token(original_token: str):
45
+ """Restore original HF_TOKEN in environment"""
46
+ if original_token is not None:
47
+ os.environ["HF_TOKEN"] = original_token
48
+ elif "HF_TOKEN" in os.environ:
49
+ del os.environ["HF_TOKEN"]
50
+
51
+ def get_default_directories():
52
+ """Get default directories based on current environment"""
53
+ import pathlib
54
+
55
+ # Detect environment
56
+ is_modal = os.environ.get("MODAL_ENVIRONMENT") == "1" or os.path.exists("/modal")
57
+ is_docker = os.path.exists("/.dockerenv")
58
+ current_dir = pathlib.Path.cwd()
59
+
60
+ # Base directories
61
+ base_dirs = []
62
+
63
+ if is_modal:
64
+ # Modal environment - use cache directories
65
+ base_dirs.extend([
66
+ "/root/cache/apple_podcasts",
67
+ "/root/cache/xyz_podcasts",
68
+ "/tmp/downloads"
69
+ ])
70
+ elif is_docker:
71
+ # Docker environment
72
+ base_dirs.extend([
73
+ "/app/downloads",
74
+ "/data/downloads",
75
+ "/tmp/downloads"
76
+ ])
77
+ else:
78
+ # Local environment - use current directory and common locations
79
+ base_dirs.extend([
80
+ str(current_dir / "downloads"),
81
+ str(current_dir / "cache" / "apple_podcasts"),
82
+ str(current_dir / "cache" / "xyz_podcasts"),
83
+ "~/Downloads",
84
+ "~/Music"
85
+ ])
86
+
87
+ # Add common directories
88
+ base_dirs.extend(["/tmp", "."])
89
+
90
+ # Filter out duplicates while preserving order
91
+ seen = set()
92
+ unique_dirs = []
93
+ for d in base_dirs:
94
+ if d not in seen:
95
+ seen.add(d)
96
+ unique_dirs.append(d)
97
+
98
+ # Determine default directory
99
+ default_dir = unique_dirs[0] if unique_dirs else str(current_dir / "downloads")
100
+
101
+ return unique_dirs, default_dir
102
+
103
  def create_gradio_interface():
104
  """Create Gradio interface
105
 
 
195
  info="Identify different speakers (requires Hugging Face Token)"
196
  )
197
 
198
+ # HF Token input for speaker diarization
199
+ hf_token_input_download = gr.Textbox(
200
+ label="Hugging Face Token (Optional)",
201
+ placeholder="Enter your HF token here to override environment variable",
202
+ type="password",
203
+ info="Required for speaker diarization. If provided, will override HF_TOKEN environment variable."
204
+ )
205
+
206
  download_btn = gr.Button("πŸ“₯ Start Download", variant="primary")
207
  result_output = gr.JSON(label="Download Results")
208
 
209
+ async def download_podcast_and_transcribe(url, platform, auto_transcribe, enable_speaker, hf_token):
210
  """Call corresponding download tool based on selected platform"""
211
+ # Temporarily set HF_TOKEN if provided
212
+ original_token = temporarily_set_hf_token(hf_token)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
 
214
  try:
215
+ if platform == "Apple Podcast":
216
+ download_result = await mcp_tools.download_apple_podcast(url)
217
+ else:
218
+ download_result = await mcp_tools.download_xyz_podcast(url)
 
 
 
 
 
219
 
220
+ # 2. Check if download was successful
221
+ if download_result["status"] != "success":
222
+ return {
223
+ "download_status": "failed",
224
+ "error_message": download_result.get("error_message", "Download failed"),
225
+ "transcription_status": "not_started"
 
 
 
 
 
 
 
226
  }
 
227
 
228
+ # 3. If not auto-transcribing, return only download results
229
+ if not auto_transcribe:
230
+ return {
231
+ "download_status": "success",
232
+ "audio_file": download_result["audio_file_path"],
233
+ "transcription_status": "skipped (user chose not to auto-transcribe)"
234
  }
235
 
236
+ # 4. Start transcription
237
+ try:
238
+ audio_path = download_result["audio_file_path"]
239
+ print(f"Transcribing audio file: {audio_path}")
240
+ transcribe_result = await mcp_tools.transcribe_audio_file(
241
+ audio_path,
242
+ model_size="turbo",
243
+ language=None,
244
+ output_format="srt",
245
+ enable_speaker_diarization=enable_speaker
246
+ )
247
+
248
+ # 5. Merge results
249
+ result = {
250
+ "download_status": "success",
251
+ "audio_file": audio_path,
252
+ "transcription_status": "success",
253
+ "txt_file_path": transcribe_result.get("txt_file_path"),
254
+ "srt_file_path": transcribe_result.get("srt_file_path"),
255
+ "transcription_details": {
256
+ "model_used": transcribe_result.get("model_used"),
257
+ "segment_count": transcribe_result.get("segment_count"),
258
+ "audio_duration": transcribe_result.get("audio_duration"),
259
+ "saved_files": transcribe_result.get("saved_files", []),
260
+ "speaker_diarization_enabled": transcribe_result.get("speaker_diarization_enabled", False)
261
+ }
262
+ }
263
+
264
+ # 6. Add speaker diarization info if enabled
265
+ if enable_speaker and transcribe_result.get("speaker_diarization_enabled", False):
266
+ result["speaker_diarization"] = {
267
+ "global_speaker_count": transcribe_result.get("global_speaker_count", 0),
268
+ "speaker_summary": transcribe_result.get("speaker_summary", {})
269
+ }
270
+
271
+ return result
272
+
273
+ except Exception as e:
274
+ return {
275
+ "download_status": "success",
276
+ "audio_file": download_result["audio_file_path"],
277
+ "transcription_status": "failed",
278
+ "error_message": str(e)
279
+ }
280
+
281
+ finally:
282
+ # Restore original HF_TOKEN
283
+ restore_hf_token(original_token)
284
 
285
  # Bind callback function
286
  download_btn.click(
287
  download_podcast_and_transcribe,
288
+ inputs=[url_input, platform_choice, auto_transcribe, enable_speaker_diarization, hf_token_input_download],
289
  outputs=result_output
290
  )
291
 
 
319
  )
320
 
321
  with gr.Row():
322
+ with gr.Column():
323
+ output_format_choice = gr.Radio(
324
+ choices=["srt", "txt", "json"],
325
+ value="srt",
326
+ label="Output Format"
327
+ )
328
+ with gr.Column():
329
+ enable_speaker_separation = gr.Checkbox(
330
+ label="Enable speaker diarization",
331
+ value=False,
332
+ info="Requires Hugging Face Token"
333
+ )
334
+ # HF Token input for speaker diarization
335
+ hf_token_input_transcribe = gr.Textbox(
336
+ label="Hugging Face Token (Optional)",
337
+ placeholder="Enter your HF token here to override environment variable",
338
+ type="password",
339
+ info="Required for speaker diarization. If provided, will override HF_TOKEN environment variable."
340
+ )
341
 
342
  transcribe_btn = gr.Button("🎀 Start Transcription", variant="primary", size="lg")
343
 
 
365
  visible=False
366
  )
367
 
368
+ def perform_transcription(audio_path, model_size, language, output_format, enable_speaker, hf_token):
369
  """Execute audio transcription"""
370
  if not audio_path.strip():
371
  return {
372
  "error": "Please enter audio file path"
373
  }, "Transcription failed: No audio file selected", gr.update(visible=False)
374
 
375
+ # Temporarily set HF_TOKEN if provided
376
+ original_token = temporarily_set_hf_token(hf_token)
 
 
 
 
 
377
 
378
  try:
379
+ # Check if file exists
380
+ import asyncio
381
+ file_info = asyncio.run(get_file_info_tool(audio_path))
382
+ if file_info["status"] != "success":
383
+ return {
384
+ "error": f"File does not exist or cannot be accessed: {file_info.get('error_message', 'Unknown error')}"
385
+ }, "Transcription failed: File inaccessible", gr.update(visible=False)
386
 
387
+ try:
388
+ # Process language parameter
389
+ lang = None if language == "auto" else language
390
+
391
+ # Call transcription tool
392
+ result = asyncio.run(transcribe_audio_file_tool(
393
+ audio_file_path=audio_path,
394
+ model_size=model_size,
395
+ language=lang,
396
+ output_format=output_format,
397
+ enable_speaker_diarization=enable_speaker
398
+ ))
399
+
400
+ # Prepare status information
401
+ if result.get("processing_status") == "success":
402
+ status_text = f"""βœ… Transcription completed!
403
  πŸ“ Generated files: {len(result.get('saved_files', []))} files
404
  🎡 Audio duration: {result.get('audio_duration', 0):.2f} seconds
405
  πŸ“ Transcription segments: {result.get('segment_count', 0)} segments
406
  🎯 Model used: {result.get('model_used', 'N/A')}
407
  🎭 Speaker diarization: {'Enabled' if result.get('speaker_diarization_enabled', False) else 'Disabled'}"""
408
+
409
+ # Show speaker information
410
+ speaker_visible = result.get('speaker_diarization_enabled', False) and result.get('global_speaker_count', 0) > 0
411
+ speaker_info = result.get('speaker_summary', {}) if speaker_visible else {}
412
+
413
+ return result, status_text, gr.update(visible=speaker_visible, value=speaker_info)
414
+ else:
415
+ error_msg = result.get('error_message', 'Unknown error')
416
+ return result, f"❌ Transcription failed: {error_msg}", gr.update(visible=False)
417
+
418
+ except Exception as e:
419
+ return {
420
+ "error": f"Exception occurred during transcription: {str(e)}"
421
+ }, f"❌ Transcription exception: {str(e)}", gr.update(visible=False)
422
+
423
+ finally:
424
+ # Restore original HF_TOKEN
425
+ restore_hf_token(original_token)
426
 
427
  # Bind transcription button
428
  transcribe_btn.click(
 
432
  model_size_choice,
433
  language_choice,
434
  output_format_choice,
435
+ enable_speaker_separation,
436
+ hf_token_input_transcribe
437
  ],
438
  outputs=[
439
  transcribe_result_output,
 
446
  with gr.Tab("MP3 File Management"):
447
  gr.Markdown("### 🎡 MP3 File Management")
448
 
449
+ # Get environment-specific directories
450
+ available_dirs, default_dir = get_default_directories()
451
+
452
+ # Display environment info
453
+ import pathlib
454
+ is_modal = os.environ.get("MODAL_ENVIRONMENT") == "1" or os.path.exists("/modal")
455
+ is_docker = os.path.exists("/.dockerenv")
456
+ current_dir = pathlib.Path.cwd()
457
+
458
+ if is_modal:
459
+ env_info = "πŸš€ **Modal Environment Detected** - Using Modal cache directories"
460
+ elif is_docker:
461
+ env_info = "🐳 **Docker Environment Detected** - Using container directories"
462
+ else:
463
+ env_info = f"πŸ’» **Local Environment Detected** - Using current directory: `{current_dir}`"
464
+
465
+ gr.Markdown(env_info)
466
+
467
+ with gr.Row():
468
+ with gr.Column(scale=3):
469
+ # Flexible directory path input
470
+ custom_dir_input = gr.Textbox(
471
+ label="Custom Directory Path",
472
+ placeholder="Enter custom directory path (e.g., /path/to/your/audio/files)",
473
+ lines=1,
474
+ value=default_dir
475
+ )
476
+ with gr.Column(scale=2):
477
+ # Quick select for environment-specific directories
478
+ quick_select = gr.Dropdown(
479
+ label="Quick Select",
480
+ choices=available_dirs,
481
+ value=default_dir,
482
+ info="Select directories based on current environment"
483
+ )
484
+ with gr.Column(scale=1):
485
+ scan_btn = gr.Button("πŸ” Scan Directory", variant="primary")
486
 
487
  file_list = gr.Textbox(
488
  label="MP3 File List",
 
495
 
496
  def list_mp3_files(directory):
497
  """List MP3 files in directory"""
498
+ if not directory or not directory.strip():
499
+ return "Please enter a directory path"
500
+
501
+ try:
502
+ result = asyncio.run(get_mp3_files_tool(directory.strip()))
503
+
504
+ # Check if there's an error
505
+ if "error_message" in result:
506
+ return f"❌ Error scanning directory: {result['error_message']}"
507
+
508
+ # Get file list
509
+ total_files = result.get('total_files', 0)
510
+ file_list = result.get('file_list', [])
511
+ scanned_directory = result.get('scanned_directory', directory)
512
+
513
+ if total_files == 0:
514
+ return f"πŸ“‚ No MP3 files found in: {scanned_directory}"
515
+
516
+ # Format file list for display
517
+ display_lines = [
518
+ f"πŸ“‚ Found {total_files} MP3 file{'s' if total_files != 1 else ''} in: {scanned_directory}",
519
+ "=" * 60
520
+ ]
521
+
522
+ for i, file_info in enumerate(file_list, 1):
523
+ filename = file_info.get('filename', 'Unknown')
524
+ size_mb = file_info.get('file_size_mb', 0)
525
+ created_time = file_info.get('created_time', 'Unknown')
526
+ full_path = file_info.get('full_path', 'Unknown')
527
+
528
+ display_lines.append(
529
+ f"{i:2d}. πŸ“„ {filename}\n"
530
+ f" πŸ’Ύ Size: {size_mb:.2f} MB\n"
531
+ f" πŸ“… Created: {created_time}\n"
532
+ f" πŸ“ Path: {full_path}"
533
+ )
534
+
535
+ return "\n".join(display_lines)
536
+
537
+ except Exception as e:
538
+ return f"❌ Exception occurred while scanning directory: {str(e)}"
539
 
540
+ def use_quick_select(selected_path):
541
+ """Use quick select path and auto-scan"""
542
+ if selected_path:
543
+ return selected_path, list_mp3_files(selected_path)
544
+ return "", ""
545
+
546
+ def scan_directory(custom_path, quick_path):
547
+ """Scan the directory based on custom input or quick select"""
548
+ directory = custom_path.strip() if custom_path.strip() else quick_path
549
+ return list_mp3_files(directory)
550
+
551
+ # Bind callback functions
552
+ quick_select.change(
553
+ use_quick_select,
554
+ inputs=[quick_select],
555
+ outputs=[custom_dir_input, file_list]
556
+ )
557
+
558
+ scan_btn.click(
559
+ scan_directory,
560
+ inputs=[custom_dir_input, quick_select],
561
+ outputs=[file_list]
562
+ )
563
+
564
+ # Auto-scan when custom directory is entered
565
+ custom_dir_input.change(
566
+ lambda x: list_mp3_files(x) if x.strip() else "",
567
+ inputs=[custom_dir_input],
568
  outputs=[file_list]
569
  )
570
 
571
  # ==================== Transcription Text Management Tab ====================
572
  with gr.Tab("Transcription Text Management"):
573
  gr.Markdown("### πŸ“ Transcription Text File Management")
574
+ gr.Markdown("View TXT and SRT files generated from audio transcription")
575
 
576
+ # File path input
577
+ file_path_input = gr.Textbox(
578
+ label="File Path",
579
+ placeholder="Enter path to TXT or SRT file to read",
580
+ lines=1
581
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
582
 
583
+ # Load button
584
+ load_file_btn = gr.Button("πŸ“‚ Load File", variant="primary")
585
+
586
+ # Text content viewer
587
  content_editor = gr.Textbox(
588
  label="File Content",
589
  placeholder="File content will be displayed here after loading...",
590
+ lines=25,
591
+ max_lines=40,
592
  show_copy_button=True,
593
+ interactive=False
594
  )
595
 
596
  # Status information
597
  status_output = gr.Textbox(
598
+ label="Status",
599
  interactive=False,
600
  lines=2
601
  )
602
 
603
+ def load_and_display_file(file_path):
604
+ """Load and display complete file content"""
 
 
 
 
 
 
 
 
 
605
  if not file_path.strip():
606
+ return "Please enter a file path", "❌ No file path provided"
607
 
608
+ try:
609
+ # Get file info first
610
+ info = asyncio.run(get_file_info_tool(file_path))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
611
 
612
+ if info["status"] != "success":
613
+ return "", f"❌ Error: {info.get('error_message', 'Unknown error')}"
 
 
 
 
614
 
615
+ # Check file size (warn for very large files)
616
+ file_size_mb = info.get('file_size_mb', 0)
617
+ if file_size_mb > 10: # Warn for files larger than 10MB
618
+ return "", f"⚠️ File is too large ({file_size_mb:.2f} MB). Please use a smaller file for viewing."
619
+
620
+ # Read entire file content
621
+ with open(file_path, 'r', encoding='utf-8') as f:
622
+ content = f.read()
623
+
624
+ # Status message
625
+ status = f"βœ… File loaded successfully: {info.get('filename', 'Unknown')}\nπŸ“ Size: {file_size_mb:.2f} MB"
626
+
627
+ return content, status
628
+
629
+ except UnicodeDecodeError:
630
+ return "", "❌ Error: File contains non-text content or encoding is not UTF-8"
631
+ except FileNotFoundError:
632
+ return "", "❌ Error: File not found"
633
+ except PermissionError:
634
+ return "", "❌ Error: Permission denied to read file"
635
+ except Exception as e:
636
+ return "", f"❌ Error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
637
 
638
+ # Bind event handler
639
  load_file_btn.click(
640
+ load_and_display_file,
641
  inputs=[file_path_input],
642
+ outputs=[content_editor, status_output]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
643
  )
644
 
645
  return demo