Spaces:

Agents-MCP-Hackathon
/

ModalTranscriberMCP

Running

App Files Files Community

richard-su commited on Jun 10

Commit

e98f763

verified ·

1 Parent(s): 4bbc337

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

src/ui/__pycache__/gradio_ui.cpython-310.pyc +0 -0
src/ui/gradio_ui.py +363 -342

src/ui/__pycache__/gradio_ui.cpython-310.pyc CHANGED Viewed

Binary files a/src/ui/__pycache__/gradio_ui.cpython-310.pyc and b/src/ui/__pycache__/gradio_ui.cpython-310.pyc differ

src/ui/gradio_ui.py CHANGED Viewed

@@ -5,10 +5,10 @@ Contains all UI components and interface logic
 import gradio as gr
 import asyncio
 from ..tools import mcp_tools
 from ..tools.download_tools import get_file_info_tool, get_mp3_files_tool, read_text_file_segments_tool
 from ..tools.transcription_tools import transcribe_audio_file_tool
-import os
 def write_text_file_content(file_path: str, content: str, mode: str = "w", position: int = None):
     """Simple text file writing function"""
@@ -33,6 +33,73 @@ def write_text_file_content(file_path: str, content: str, mode: str = "w", posit
             "error_message": str(e)
         }
 def create_gradio_interface():
     """Create Gradio interface
@@ -128,81 +195,97 @@ def create_gradio_interface():
                     info="Identify different speakers (requires Hugging Face Token)"
                 )
             download_btn = gr.Button("📥 Start Download", variant="primary")
             result_output = gr.JSON(label="Download Results")
-            async def download_podcast_and_transcribe(url, platform, auto_transcribe, enable_speaker):
                 """Call corresponding download tool based on selected platform"""
-                if platform == "Apple Podcast":
-                    download_result = await mcp_tools.download_apple_podcast(url)
-                else:
-                    download_result = await mcp_tools.download_xyz_podcast(url)
-                # 2. Check if download was successful
-                if download_result["status"] != "success":
-                    return {
-                        "download_status": "failed",
-                        "error_message": download_result.get("error_message", "Download failed"),
-                        "transcription_status": "not_started"
-                    }
-                # 3. If not auto-transcribing, return only download results
-                if not auto_transcribe:
-                    return {
-                        "download_status": "success",
-                        "audio_file": download_result["audio_file_path"],
-                        "transcription_status": "skipped (user chose not to auto-transcribe)"
-                    }
-                # 4. Start transcription
                 try:
-                    audio_path = download_result["audio_file_path"]
-                    print(f"Transcribing audio file: {audio_path}")
-                    transcribe_result = await mcp_tools.transcribe_audio_file(
-                        audio_path,
-                        model_size="turbo",
-                        language=None,
-                        output_format="srt",
-                        enable_speaker_diarization=enable_speaker
-                    )
-                    # 5. Merge results
-                    result = {
-                        "download_status": "success",
-                        "audio_file": audio_path,
-                        "transcription_status": "success",
-                        "txt_file_path": transcribe_result.get("txt_file_path"),
-                        "srt_file_path": transcribe_result.get("srt_file_path"),
-                        "transcription_details": {
-                            "model_used": transcribe_result.get("model_used"),
-                            "segment_count": transcribe_result.get("segment_count"),
-                            "audio_duration": transcribe_result.get("audio_duration"),
-                            "saved_files": transcribe_result.get("saved_files", []),
-                            "speaker_diarization_enabled": transcribe_result.get("speaker_diarization_enabled", False)
                         }
-                    }
-                    # 6. Add speaker diarization info if enabled
-                    if enable_speaker and transcribe_result.get("speaker_diarization_enabled", False):
-                        result["speaker_diarization"] = {
-                            "global_speaker_count": transcribe_result.get("global_speaker_count", 0),
-                            "speaker_summary": transcribe_result.get("speaker_summary", {})
                         }
-                    return result
-                except Exception as e:
-                    return {
-                        "download_status": "success",
-                        "audio_file": download_result["audio_file_path"],
-                        "transcription_status": "failed",
-                        "error_message": str(e)
-                    }
             # Bind callback function
             download_btn.click(
                 download_podcast_and_transcribe,
-                inputs=[url_input, platform_choice, auto_transcribe, enable_speaker_diarization],
                 outputs=result_output
             )
@@ -236,16 +319,25 @@ def create_gradio_interface():
                         )
                     with gr.Row():
-                        output_format_choice = gr.Radio(
-                            choices=["srt", "txt", "json"],
-                            value="srt",
-                            label="Output Format"
-                        )
-                        enable_speaker_separation = gr.Checkbox(
-                            label="Enable speaker diarization",
-                            value=False,
-                            info="Requires Hugging Face Token"
-                        )
                     transcribe_btn = gr.Button("🎤 Start Transcription", variant="primary", size="lg")
@@ -273,56 +365,64 @@ def create_gradio_interface():
                 visible=False
             )
-            def perform_transcription(audio_path, model_size, language, output_format, enable_speaker):
                 """Execute audio transcription"""
                 if not audio_path.strip():
                     return {
                         "error": "Please enter audio file path"
                     }, "Transcription failed: No audio file selected", gr.update(visible=False)
-                # Check if file exists
-                import asyncio
-                file_info = asyncio.run(get_file_info_tool(audio_path))
-                if file_info["status"] != "success":
-                    return {
-                        "error": f"File does not exist or cannot be accessed: {file_info.get('error_message', 'Unknown error')}"
-                    }, "Transcription failed: File inaccessible", gr.update(visible=False)
                 try:
-                    # Process language parameter
-                    lang = None if language == "auto" else language
-                    # Call transcription tool
-                    result = asyncio.run(transcribe_audio_file_tool(
-                        audio_file_path=audio_path,
-                        model_size=model_size,
-                        language=lang,
-                        output_format=output_format,
-                        enable_speaker_diarization=enable_speaker
-                    ))
-                    # Prepare status information
-                    if result.get("processing_status") == "success":
-                        status_text = f"""✅ Transcription completed!
 📁 Generated files: {len(result.get('saved_files', []))} files
 🎵 Audio duration: {result.get('audio_duration', 0):.2f} seconds
 📝 Transcription segments: {result.get('segment_count', 0)} segments
 🎯 Model used: {result.get('model_used', 'N/A')}
 🎭 Speaker diarization: {'Enabled' if result.get('speaker_diarization_enabled', False) else 'Disabled'}"""
-                        # Show speaker information
-                        speaker_visible = result.get('speaker_diarization_enabled', False) and result.get('global_speaker_count', 0) > 0
-                        speaker_info = result.get('speaker_summary', {}) if speaker_visible else {}
-                        return result, status_text, gr.update(visible=speaker_visible, value=speaker_info)
-                    else:
-                        error_msg = result.get('error_message', 'Unknown error')
-                        return result, f"❌ Transcription failed: {error_msg}", gr.update(visible=False)
-                except Exception as e:
-                    return {
-                        "error": f"Exception occurred during transcription: {str(e)}"
-                    }, f"❌ Transcription exception: {str(e)}", gr.update(visible=False)
             # Bind transcription button
             transcribe_btn.click(
@@ -332,7 +432,8 @@ def create_gradio_interface():
                     model_size_choice,
                     language_choice,
                     output_format_choice,
-                    enable_speaker_separation
                 ],
                 outputs=[
                     transcribe_result_output,
@@ -345,14 +446,43 @@ def create_gradio_interface():
         with gr.Tab("MP3 File Management"):
             gr.Markdown("### 🎵 MP3 File Management")
-            dir_input = gr.Dropdown(
-                label="Directory Path",
-                choices=[
-                    "/root/cache/apple_podcasts",
-                    "/root/cache/xyz_podcasts"
-                ],
-                value="/root/cache/apple_podcasts"
-            )
             file_list = gr.Textbox(
                 label="MP3 File List",
@@ -365,260 +495,151 @@ def create_gradio_interface():
             def list_mp3_files(directory):
                 """List MP3 files in directory"""
-                files = asyncio.run(get_mp3_files_tool(directory))
-                return "\n".join(files) if files else "No MP3 files found in directory"
-            # Bind callback function
-            dir_input.change(
-                list_mp3_files,
-                inputs=[dir_input],
                 outputs=[file_list]
             )
         # ==================== Transcription Text Management Tab ====================
         with gr.Tab("Transcription Text Management"):
             gr.Markdown("### 📝 Transcription Text File Management")
-            gr.Markdown("Manage and edit TXT and SRT files generated from audio transcription")
-            with gr.Row():
-                with gr.Column(scale=2):
-                    # File path input
-                    file_path_input = gr.Textbox(
-                        label="File Path",
-                        placeholder="Enter path to TXT or SRT file to read",
-                        lines=1
-                    )
-                    # File information display
-                    file_info_output = gr.JSON(label="File Information", visible=False)
-                    with gr.Row():
-                        load_file_btn = gr.Button("📂 Load File", variant="secondary")
-                        save_file_btn = gr.Button("💾 Save File", variant="primary")
-                        refresh_btn = gr.Button("🔄 Refresh", variant="secondary")
-                with gr.Column(scale=1):
-                    # Read control
-                    gr.Markdown("#### 📖 Segmented Reading Control")
-                    current_position = gr.Number(
-                        label="Current Position (bytes)",
-                        value=0,
-                        minimum=0
-                    )
-                    chunk_size = gr.Number(
-                        label="Chunk Size (bytes)",
-                        value=65536,  # 64KB
-                        minimum=1024,
-                        maximum=1048576  # Max 1MB
-                    )
-                    with gr.Row():
-                        prev_chunk_btn = gr.Button("⬅️ Previous", size="sm")
-                        next_chunk_btn = gr.Button("➡️ Next", size="sm")
-                    # Progress display
-                    progress_display = gr.Textbox(
-                        label="Reading Progress",
-                        value="No file loaded",
-                        interactive=False,
-                        lines=3
-                    )
-                    # Write control
-                    gr.Markdown("#### ✏️ Write Control")
-                    write_mode = gr.Radio(
-                        choices=["w", "a", "r+"],
-                        value="w",
-                        label="Write Mode",
-                        info="w=overwrite, a=append, r+=position"
-                    )
-                    write_position = gr.Number(
-                        label="Write Position (bytes)",
-                        value=0,
-                        minimum=0,
-                        visible=False
-                    )
-            # Text content editor
             content_editor = gr.Textbox(
                 label="File Content",
                 placeholder="File content will be displayed here after loading...",
-                lines=20,
-                max_lines=30,
                 show_copy_button=True,
-                autoscroll=False
             )
             # Status information
             status_output = gr.Textbox(
-                label="Operation Status",
                 interactive=False,
                 lines=2
             )
-            # Internal state variables
-            file_state = gr.State({
-                "file_path": "",
-                "file_size": 0,
-                "current_pos": 0,
-                "chunk_size": 65536,
-                "content": ""
-            })
-            def load_file_info(file_path):
-                """Load file information"""
                 if not file_path.strip():
-                    return {}, "Please enter file path", "No file selected", gr.update(visible=False)
-                info = asyncio.run(get_file_info_tool(file_path))
-                if info["status"] == "success":
-                    return (
-                        info,
-                        f"File: {info['filename']} | Size: {info['file_size_mb']} MB",
-                        "File information loaded successfully",
-                        gr.update(visible=True)
-                    )
-                else:
-                    return (
-                        {},
-                        f"Error: {info.get('error_message', 'Unknown error')}",
-                        "Failed to load file information",
-                        gr.update(visible=False)
-                    )
-            def read_file_content(file_path, position, chunk_size):
-                """Read file content"""
-                if not file_path.strip():
-                    return "", 0, "No file selected", {
-                        "file_path": "",
-                        "file_size": 0,
-                        "current_pos": 0,
-                        "chunk_size": chunk_size,
-                        "content": ""
-                    }
-                result = asyncio.run(read_text_file_segments_tool(file_path, int(chunk_size), int(position)))
-                if result["status"] == "success":
-                    new_state = {
-                        "file_path": file_path,
-                        "file_size": result["file_size"],
-                        "current_pos": result["current_position"],
-                        "chunk_size": chunk_size,
-                        "content": result["content"]
-                    }
-                    progress_text = (
-                        f"Progress: {result['progress_percentage']:.1f}% "
-                        f"({result['current_position']}/{result['file_size']} bytes)\n"
-                        f"Boundary type: {result.get('actual_boundary', 'Unknown')}\n"
-                        f"{'End of file reached' if result['end_of_file_reached'] else 'More content available'}"
-                    )
-                    return (
-                        result["content"],
-                        result["current_position"],
-                        progress_text,
-                        new_state
-                    )
-                else:
-                    return (
-                        "",
-                        position,
-                        f"Read failed: {result.get('error_message', 'Unknown error')}",
-                        {
-                            "file_path": file_path,
-                            "file_size": 0,
-                            "current_pos": position,
-                            "chunk_size": chunk_size,
-                            "content": ""
-                        }
-                    )
-            def save_file_content(file_path, content, mode, position):
-                """Save file content"""
-                if not file_path.strip():
-                    return "Please select a file first"
-                if not content.strip():
-                    return "No content to save"
-                # Determine whether to use position parameter based on mode
-                write_pos = position if mode == "r+" else None
-                result = write_text_file_content(file_path, content, mode, write_pos)
-                if result["status"] == "success":
-                    operation_info = f"Operation: {result.get('operation_type', mode)}"
-                    size_info = f"Size change: {result.get('size_change', 0):+d} bytes"
-                    return f"Save successful!\n{operation_info}\nWrote {result['characters_written']} characters\n{size_info}"
-                else:
-                    return f"Save failed: {result.get('error_message', 'Unknown error')}"
-            def navigate_chunks(file_state, direction):
-                """Navigate to previous or next chunk"""
-                if not file_state["file_path"]:
-                    return file_state["current_pos"], "Please load a file first"
-                chunk_size = file_state["chunk_size"]
-                current_pos = file_state["current_pos"]
-                if direction == "prev":
-                    new_pos = max(0, current_pos - chunk_size * 2)  # Go back two chunks
-                elif direction == "next":
-                    new_pos = current_pos  # Next chunk starts from current position
-                else:
-                    return current_pos, "Invalid navigation direction"
-                return new_pos, f"Navigated to position: {new_pos}"
-            # Bind event handlers
             load_file_btn.click(
-                load_file_info,
                 inputs=[file_path_input],
-                outputs=[file_info_output, progress_display, status_output, file_info_output]
-            ).then(
-                read_file_content,
-                inputs=[file_path_input, current_position, chunk_size],
-                outputs=[content_editor, current_position, progress_display, file_state]
-            )
-            refresh_btn.click(
-                read_file_content,
-                inputs=[file_path_input, current_position, chunk_size],
-                outputs=[content_editor, current_position, progress_display, file_state]
-            )
-            # Control position input visibility when write mode changes
-            write_mode.change(
-                lambda mode: gr.update(visible=(mode == "r+")),
-                inputs=[write_mode],
-                outputs=[write_position]
-            )
-            save_file_btn.click(
-                save_file_content,
-                inputs=[file_path_input, content_editor, write_mode, write_position],
-                outputs=[status_output]
-            )
-            prev_chunk_btn.click(
-                lambda state: navigate_chunks(state, "prev"),
-                inputs=[file_state],
-                outputs=[current_position, status_output]
-            ).then(
-                read_file_content,
-                inputs=[file_path_input, current_position, chunk_size],
-                outputs=[content_editor, current_position, progress_display, file_state]
-            )
-            next_chunk_btn.click(
-                lambda state: navigate_chunks(state, "next"),
-                inputs=[file_state],
-                outputs=[current_position, status_output]
-            ).then(
-                read_file_content,
-                inputs=[file_path_input, current_position, chunk_size],
-                outputs=[content_editor, current_position, progress_display, file_state]
             )
     return demo

 import gradio as gr
 import asyncio
+import os
 from ..tools import mcp_tools
 from ..tools.download_tools import get_file_info_tool, get_mp3_files_tool, read_text_file_segments_tool
 from ..tools.transcription_tools import transcribe_audio_file_tool
 def write_text_file_content(file_path: str, content: str, mode: str = "w", position: int = None):
     """Simple text file writing function"""
             "error_message": str(e)
         }
+def temporarily_set_hf_token(hf_token: str):
+    """Temporarily set HF_TOKEN in environment"""
+    original_token = os.environ.get("HF_TOKEN")
+    if hf_token and hf_token.strip():
+        os.environ["HF_TOKEN"] = hf_token.strip()
+        print(f"🔑 Using user-provided HF_TOKEN: {hf_token[:10]}...")
+    return original_token
+def restore_hf_token(original_token: str):
+    """Restore original HF_TOKEN in environment"""
+    if original_token is not None:
+        os.environ["HF_TOKEN"] = original_token
+    elif "HF_TOKEN" in os.environ:
+        del os.environ["HF_TOKEN"]
+def get_default_directories():
+    """Get default directories based on current environment"""
+    import pathlib
+    # Detect environment
+    is_modal = os.environ.get("MODAL_ENVIRONMENT") == "1" or os.path.exists("/modal")
+    is_docker = os.path.exists("/.dockerenv")
+    current_dir = pathlib.Path.cwd()
+    # Base directories
+    base_dirs = []
+    if is_modal:
+        # Modal environment - use cache directories
+        base_dirs.extend([
+            "/root/cache/apple_podcasts",
+            "/root/cache/xyz_podcasts",
+            "/tmp/downloads"
+        ])
+    elif is_docker:
+        # Docker environment
+        base_dirs.extend([
+            "/app/downloads",
+            "/data/downloads",
+            "/tmp/downloads"
+        ])
+    else:
+        # Local environment - use current directory and common locations
+        base_dirs.extend([
+            str(current_dir / "downloads"),
+            str(current_dir / "cache" / "apple_podcasts"),
+            str(current_dir / "cache" / "xyz_podcasts"),
+            "~/Downloads",
+            "~/Music"
+        ])
+    # Add common directories
+    base_dirs.extend(["/tmp", "."])
+    # Filter out duplicates while preserving order
+    seen = set()
+    unique_dirs = []
+    for d in base_dirs:
+        if d not in seen:
+            seen.add(d)
+            unique_dirs.append(d)
+    # Determine default directory
+    default_dir = unique_dirs[0] if unique_dirs else str(current_dir / "downloads")
+    return unique_dirs, default_dir
 def create_gradio_interface():
     """Create Gradio interface
                     info="Identify different speakers (requires Hugging Face Token)"
                 )
+                # HF Token input for speaker diarization
+                hf_token_input_download = gr.Textbox(
+                    label="Hugging Face Token (Optional)",
+                    placeholder="Enter your HF token here to override environment variable",
+                    type="password",
+                    info="Required for speaker diarization. If provided, will override HF_TOKEN environment variable."
+                )
             download_btn = gr.Button("📥 Start Download", variant="primary")
             result_output = gr.JSON(label="Download Results")
+            async def download_podcast_and_transcribe(url, platform, auto_transcribe, enable_speaker, hf_token):
                 """Call corresponding download tool based on selected platform"""
+                # Temporarily set HF_TOKEN if provided
+                original_token = temporarily_set_hf_token(hf_token)
                 try:
+                    if platform == "Apple Podcast":
+                        download_result = await mcp_tools.download_apple_podcast(url)
+                    else:
+                        download_result = await mcp_tools.download_xyz_podcast(url)
+                    # 2. Check if download was successful
+                    if download_result["status"] != "success":
+                        return {
+                            "download_status": "failed",
+                            "error_message": download_result.get("error_message", "Download failed"),
+                            "transcription_status": "not_started"
                         }
+                    # 3. If not auto-transcribing, return only download results
+                    if not auto_transcribe:
+                        return {
+                            "download_status": "success",
+                            "audio_file": download_result["audio_file_path"],
+                            "transcription_status": "skipped (user chose not to auto-transcribe)"
                         }
+                    # 4. Start transcription
+                    try:
+                        audio_path = download_result["audio_file_path"]
+                        print(f"Transcribing audio file: {audio_path}")
+                        transcribe_result = await mcp_tools.transcribe_audio_file(
+                            audio_path,
+                            model_size="turbo",
+                            language=None,
+                            output_format="srt",
+                            enable_speaker_diarization=enable_speaker
+                        )
+                        # 5. Merge results
+                        result = {
+                            "download_status": "success",
+                            "audio_file": audio_path,
+                            "transcription_status": "success",
+                            "txt_file_path": transcribe_result.get("txt_file_path"),
+                            "srt_file_path": transcribe_result.get("srt_file_path"),
+                            "transcription_details": {
+                                "model_used": transcribe_result.get("model_used"),
+                                "segment_count": transcribe_result.get("segment_count"),
+                                "audio_duration": transcribe_result.get("audio_duration"),
+                                "saved_files": transcribe_result.get("saved_files", []),
+                                "speaker_diarization_enabled": transcribe_result.get("speaker_diarization_enabled", False)
+                            }
+                        }
+                        # 6. Add speaker diarization info if enabled
+                        if enable_speaker and transcribe_result.get("speaker_diarization_enabled", False):
+                            result["speaker_diarization"] = {
+                                "global_speaker_count": transcribe_result.get("global_speaker_count", 0),
+                                "speaker_summary": transcribe_result.get("speaker_summary", {})
+                            }
+                        return result
+                    except Exception as e:
+                        return {
+                            "download_status": "success",
+                            "audio_file": download_result["audio_file_path"],
+                            "transcription_status": "failed",
+                            "error_message": str(e)
+                        }
+                finally:
+                    # Restore original HF_TOKEN
+                    restore_hf_token(original_token)
             # Bind callback function
             download_btn.click(
                 download_podcast_and_transcribe,
+                inputs=[url_input, platform_choice, auto_transcribe, enable_speaker_diarization, hf_token_input_download],
                 outputs=result_output
             )
                         )
                     with gr.Row():
+                        with gr.Column():
+                            output_format_choice = gr.Radio(
+                                choices=["srt", "txt", "json"],
+                                value="srt",
+                                label="Output Format"
+                            )
+                        with gr.Column():
+                            enable_speaker_separation = gr.Checkbox(
+                                label="Enable speaker diarization",
+                                value=False,
+                                info="Requires Hugging Face Token"
+                            )
+                            # HF Token input for speaker diarization
+                            hf_token_input_transcribe = gr.Textbox(
+                                label="Hugging Face Token (Optional)",
+                                placeholder="Enter your HF token here to override environment variable",
+                                type="password",
+                                info="Required for speaker diarization. If provided, will override HF_TOKEN environment variable."
+                            )
                     transcribe_btn = gr.Button("🎤 Start Transcription", variant="primary", size="lg")
                 visible=False
             )
+            def perform_transcription(audio_path, model_size, language, output_format, enable_speaker, hf_token):
                 """Execute audio transcription"""
                 if not audio_path.strip():
                     return {
                         "error": "Please enter audio file path"
                     }, "Transcription failed: No audio file selected", gr.update(visible=False)
+                # Temporarily set HF_TOKEN if provided
+                original_token = temporarily_set_hf_token(hf_token)
                 try:
+                    # Check if file exists
+                    import asyncio
+                    file_info = asyncio.run(get_file_info_tool(audio_path))
+                    if file_info["status"] != "success":
+                        return {
+                            "error": f"File does not exist or cannot be accessed: {file_info.get('error_message', 'Unknown error')}"
+                        }, "Transcription failed: File inaccessible", gr.update(visible=False)
+                    try:
+                        # Process language parameter
+                        lang = None if language == "auto" else language
+                        # Call transcription tool
+                        result = asyncio.run(transcribe_audio_file_tool(
+                            audio_file_path=audio_path,
+                            model_size=model_size,
+                            language=lang,
+                            output_format=output_format,
+                            enable_speaker_diarization=enable_speaker
+                        ))
+                        # Prepare status information
+                        if result.get("processing_status") == "success":
+                            status_text = f"""✅ Transcription completed!
 📁 Generated files: {len(result.get('saved_files', []))} files
 🎵 Audio duration: {result.get('audio_duration', 0):.2f} seconds
 📝 Transcription segments: {result.get('segment_count', 0)} segments
 🎯 Model used: {result.get('model_used', 'N/A')}
 🎭 Speaker diarization: {'Enabled' if result.get('speaker_diarization_enabled', False) else 'Disabled'}"""
+                            # Show speaker information
+                            speaker_visible = result.get('speaker_diarization_enabled', False) and result.get('global_speaker_count', 0) > 0
+                            speaker_info = result.get('speaker_summary', {}) if speaker_visible else {}
+                            return result, status_text, gr.update(visible=speaker_visible, value=speaker_info)
+                        else:
+                            error_msg = result.get('error_message', 'Unknown error')
+                            return result, f"❌ Transcription failed: {error_msg}", gr.update(visible=False)
+                    except Exception as e:
+                        return {
+                            "error": f"Exception occurred during transcription: {str(e)}"
+                        }, f"❌ Transcription exception: {str(e)}", gr.update(visible=False)
+                finally:
+                    # Restore original HF_TOKEN
+                    restore_hf_token(original_token)
             # Bind transcription button
             transcribe_btn.click(
                     model_size_choice,
                     language_choice,
                     output_format_choice,
+                    enable_speaker_separation,
+                    hf_token_input_transcribe
                 ],
                 outputs=[
                     transcribe_result_output,
         with gr.Tab("MP3 File Management"):
             gr.Markdown("### 🎵 MP3 File Management")
+            # Get environment-specific directories
+            available_dirs, default_dir = get_default_directories()
+            # Display environment info
+            import pathlib
+            is_modal = os.environ.get("MODAL_ENVIRONMENT") == "1" or os.path.exists("/modal")
+            is_docker = os.path.exists("/.dockerenv")
+            current_dir = pathlib.Path.cwd()
+            if is_modal:
+                env_info = "🚀 **Modal Environment Detected** - Using Modal cache directories"
+            elif is_docker:
+                env_info = "🐳 **Docker Environment Detected** - Using container directories"
+            else:
+                env_info = f"💻 **Local Environment Detected** - Using current directory: `{current_dir}`"
+            gr.Markdown(env_info)
+            with gr.Row():
+                with gr.Column(scale=3):
+                    # Flexible directory path input
+                    custom_dir_input = gr.Textbox(
+                        label="Custom Directory Path",
+                        placeholder="Enter custom directory path (e.g., /path/to/your/audio/files)",
+                        lines=1,
+                        value=default_dir
+                    )
+                with gr.Column(scale=2):
+                    # Quick select for environment-specific directories
+                    quick_select = gr.Dropdown(
+                        label="Quick Select",
+                        choices=available_dirs,
+                        value=default_dir,
+                        info="Select directories based on current environment"
+                    )
+                with gr.Column(scale=1):
+                    scan_btn = gr.Button("🔍 Scan Directory", variant="primary")
             file_list = gr.Textbox(
                 label="MP3 File List",
             def list_mp3_files(directory):
                 """List MP3 files in directory"""
+                if not directory or not directory.strip():
+                    return "Please enter a directory path"
+                try:
+                    result = asyncio.run(get_mp3_files_tool(directory.strip()))
+                    # Check if there's an error
+                    if "error_message" in result:
+                        return f"❌ Error scanning directory: {result['error_message']}"
+                    # Get file list
+                    total_files = result.get('total_files', 0)
+                    file_list = result.get('file_list', [])
+                    scanned_directory = result.get('scanned_directory', directory)
+                    if total_files == 0:
+                        return f"📂 No MP3 files found in: {scanned_directory}"
+                    # Format file list for display
+                    display_lines = [
+                        f"📂 Found {total_files} MP3 file{'s' if total_files != 1 else ''} in: {scanned_directory}",
+                        "=" * 60
+                    ]
+                    for i, file_info in enumerate(file_list, 1):
+                        filename = file_info.get('filename', 'Unknown')
+                        size_mb = file_info.get('file_size_mb', 0)
+                        created_time = file_info.get('created_time', 'Unknown')
+                        full_path = file_info.get('full_path', 'Unknown')
+                        display_lines.append(
+                            f"{i:2d}. 📄 {filename}\n"
+                            f"     💾 Size: {size_mb:.2f} MB\n"
+                            f"     📅 Created: {created_time}\n"
+                            f"     📁 Path: {full_path}"
+                        )
+                    return "\n".join(display_lines)
+                except Exception as e:
+                    return f"❌ Exception occurred while scanning directory: {str(e)}"
+            def use_quick_select(selected_path):
+                """Use quick select path and auto-scan"""
+                if selected_path:
+                    return selected_path, list_mp3_files(selected_path)
+                return "", ""
+            def scan_directory(custom_path, quick_path):
+                """Scan the directory based on custom input or quick select"""
+                directory = custom_path.strip() if custom_path.strip() else quick_path
+                return list_mp3_files(directory)
+            # Bind callback functions
+            quick_select.change(
+                use_quick_select,
+                inputs=[quick_select],
+                outputs=[custom_dir_input, file_list]
+            )
+            scan_btn.click(
+                scan_directory,
+                inputs=[custom_dir_input, quick_select],
+                outputs=[file_list]
+            )
+            # Auto-scan when custom directory is entered
+            custom_dir_input.change(
+                lambda x: list_mp3_files(x) if x.strip() else "",
+                inputs=[custom_dir_input],
                 outputs=[file_list]
             )
         # ==================== Transcription Text Management Tab ====================
         with gr.Tab("Transcription Text Management"):
             gr.Markdown("### 📝 Transcription Text File Management")
+            gr.Markdown("View TXT and SRT files generated from audio transcription")
+            # File path input
+            file_path_input = gr.Textbox(
+                label="File Path",
+                placeholder="Enter path to TXT or SRT file to read",
+                lines=1
+            )
+            # Load button
+            load_file_btn = gr.Button("📂 Load File", variant="primary")
+            # Text content viewer
             content_editor = gr.Textbox(
                 label="File Content",
                 placeholder="File content will be displayed here after loading...",
+                lines=25,
+                max_lines=40,
                 show_copy_button=True,
+                interactive=False
             )
             # Status information
             status_output = gr.Textbox(
+                label="Status",
                 interactive=False,
                 lines=2
             )
+            def load_and_display_file(file_path):
+                """Load and display complete file content"""
                 if not file_path.strip():
+                    return "Please enter a file path", "❌ No file path provided"
+                try:
+                    # Get file info first
+                    info = asyncio.run(get_file_info_tool(file_path))
+                    if info["status"] != "success":
+                        return "", f"❌ Error: {info.get('error_message', 'Unknown error')}"
+                    # Check file size (warn for very large files)
+                    file_size_mb = info.get('file_size_mb', 0)
+                    if file_size_mb > 10:  # Warn for files larger than 10MB
+                        return "", f"⚠️ File is too large ({file_size_mb:.2f} MB). Please use a smaller file for viewing."
+                    # Read entire file content
+                    with open(file_path, 'r', encoding='utf-8') as f:
+                        content = f.read()
+                    # Status message
+                    status = f"✅ File loaded successfully: {info.get('filename', 'Unknown')}\n📁 Size: {file_size_mb:.2f} MB"
+                    return content, status
+                except UnicodeDecodeError:
+                    return "", "❌ Error: File contains non-text content or encoding is not UTF-8"
+                except FileNotFoundError:
+                    return "", "❌ Error: File not found"
+                except PermissionError:
+                    return "", "❌ Error: Permission denied to read file"
+                except Exception as e:
+                    return "", f"❌ Error: {str(e)}"
+            # Bind event handler
             load_file_btn.click(
+                load_and_display_file,
                 inputs=[file_path_input],
+                outputs=[content_editor, status_output]
             )
     return demo