""" Gradio interface module Contains all UI components and interface logic """ import gradio as gr import asyncio from ..tools import mcp_tools from ..tools.download_tools import get_file_info_tool, get_mp3_files_tool, read_text_file_segments_tool from ..tools.transcription_tools import transcribe_audio_file_tool import os def write_text_file_content(file_path: str, content: str, mode: str = "w", position: int = None): """Simple text file writing function""" try: if mode == "r+" and position is not None: with open(file_path, mode, encoding='utf-8') as f: f.seek(position) characters_written = f.write(content) else: with open(file_path, mode, encoding='utf-8') as f: characters_written = f.write(content) return { "status": "success", "characters_written": characters_written, "operation_type": mode, "size_change": len(content) } except Exception as e: return { "status": "failed", "error_message": str(e) } def create_gradio_interface(): """Create Gradio interface Returns: gr.Blocks: Configured Gradio interface """ with gr.Blocks(title="MCP Tool Server") as demo: gr.Markdown("# 🤖 Gradio + FastMCP Server") gr.Markdown("This server provides both Gradio UI and FastMCP tools!") # ==================== Podcast Download Tab ==================== with gr.Tab("Podcast Download"): gr.Markdown("### 🎙️ Download Podcast Audio") url_input = gr.Textbox( label="Podcast Link", placeholder="Enter podcast page URL", lines=1 ) platform_choice = gr.Radio( choices=["Apple Podcast", "XiaoYuZhou"], label="Select Podcast Platform", value="Apple Podcast" ) # Transcription options with gr.Row(): auto_transcribe = gr.Checkbox( label="Auto-transcribe after download", value=True, info="Start transcription immediately after download" ) enable_speaker_diarization = gr.Checkbox( label="Enable speaker diarization", value=False, info="Identify different speakers (requires Hugging Face Token)" ) download_btn = gr.Button("📥 Start Download", variant="primary") result_output = gr.JSON(label="Download Results") async def download_podcast_and_transcribe(url, platform, auto_transcribe, enable_speaker): """Call corresponding download tool based on selected platform""" if platform == "Apple Podcast": download_result = await mcp_tools.download_apple_podcast(url) else: download_result = await mcp_tools.download_xyz_podcast(url) # 2. Check if download was successful if download_result["status"] != "success": return { "download_status": "failed", "error_message": download_result.get("error_message", "Download failed"), "transcription_status": "not_started" } # 3. If not auto-transcribing, return only download results if not auto_transcribe: return { "download_status": "success", "audio_file": download_result["audio_file_path"], "transcription_status": "skipped (user chose not to auto-transcribe)" } # 4. Start transcription try: audio_path = download_result["audio_file_path"] print(f"Transcribing audio file: {audio_path}") transcribe_result = await mcp_tools.transcribe_audio_file( audio_path, model_size="turbo", language=None, output_format="srt", enable_speaker_diarization=enable_speaker ) # 5. Merge results result = { "download_status": "success", "audio_file": audio_path, "transcription_status": "success", "txt_file_path": transcribe_result.get("txt_file_path"), "srt_file_path": transcribe_result.get("srt_file_path"), "transcription_details": { "model_used": transcribe_result.get("model_used"), "segment_count": transcribe_result.get("segment_count"), "audio_duration": transcribe_result.get("audio_duration"), "saved_files": transcribe_result.get("saved_files", []), "speaker_diarization_enabled": transcribe_result.get("speaker_diarization_enabled", False) } } # 6. Add speaker diarization info if enabled if enable_speaker and transcribe_result.get("speaker_diarization_enabled", False): result["speaker_diarization"] = { "global_speaker_count": transcribe_result.get("global_speaker_count", 0), "speaker_summary": transcribe_result.get("speaker_summary", {}) } return result except Exception as e: return { "download_status": "success", "audio_file": download_result["audio_file_path"], "transcription_status": "failed", "error_message": str(e) } # Bind callback function download_btn.click( download_podcast_and_transcribe, inputs=[url_input, platform_choice, auto_transcribe, enable_speaker_diarization], outputs=result_output ) # ==================== Audio Transcription Tab ==================== with gr.Tab("Audio Transcription"): gr.Markdown("### 🎤 Audio Transcription and Speaker Diarization") gr.Markdown("Upload audio files for high-quality transcription with speaker diarization support") with gr.Row(): with gr.Column(scale=2): # Audio file input audio_file_input = gr.Textbox( label="Audio File Path", placeholder="Enter complete path to audio file (supports mp3, wav, m4a, etc.)", lines=1 ) # Transcription parameter settings with gr.Row(): model_size_choice = gr.Dropdown( choices=["tiny", "base", "small", "medium", "large", "turbo"], value="turbo", label="Model Size", info="Affects transcription accuracy and speed" ) language_choice = gr.Dropdown( choices=["auto", "zh", "en", "ja", "ko", "fr", "de", "es"], value="auto", label="Language", info="auto=auto-detect" ) with gr.Row(): output_format_choice = gr.Radio( choices=["srt", "txt", "json"], value="srt", label="Output Format" ) enable_speaker_separation = gr.Checkbox( label="Enable speaker diarization", value=False, info="Requires Hugging Face Token" ) transcribe_btn = gr.Button("🎤 Start Transcription", variant="primary", size="lg") with gr.Column(scale=1): # Audio file information audio_info_output = gr.JSON(label="Audio File Information", visible=False) # Transcription progress and status transcribe_status = gr.Textbox( label="Transcription Status", value="Waiting to start transcription...", interactive=False, lines=3 ) # Transcription results display transcribe_result_output = gr.JSON( label="Transcription Results", visible=True ) # Speaker diarization results (if enabled) speaker_info_output = gr.JSON( label="Speaker Diarization Information", visible=False ) def perform_transcription(audio_path, model_size, language, output_format, enable_speaker): """Execute audio transcription""" if not audio_path.strip(): return { "error": "Please enter audio file path" }, "Transcription failed: No audio file selected", gr.update(visible=False) # Check if file exists import asyncio file_info = asyncio.run(get_file_info_tool(audio_path)) if file_info["status"] != "success": return { "error": f"File does not exist or cannot be accessed: {file_info.get('error_message', 'Unknown error')}" }, "Transcription failed: File inaccessible", gr.update(visible=False) try: # Process language parameter lang = None if language == "auto" else language # Call transcription tool result = asyncio.run(transcribe_audio_file_tool( audio_file_path=audio_path, model_size=model_size, language=lang, output_format=output_format, enable_speaker_diarization=enable_speaker )) # Prepare status information if result.get("processing_status") == "success": status_text = f"""✅ Transcription completed! 📁 Generated files: {len(result.get('saved_files', []))} files 🎵 Audio duration: {result.get('audio_duration', 0):.2f} seconds 📝 Transcription segments: {result.get('segment_count', 0)} segments 🎯 Model used: {result.get('model_used', 'N/A')} 🎭 Speaker diarization: {'Enabled' if result.get('speaker_diarization_enabled', False) else 'Disabled'}""" # Show speaker information speaker_visible = result.get('speaker_diarization_enabled', False) and result.get('global_speaker_count', 0) > 0 speaker_info = result.get('speaker_summary', {}) if speaker_visible else {} return result, status_text, gr.update(visible=speaker_visible, value=speaker_info) else: error_msg = result.get('error_message', 'Unknown error') return result, f"❌ Transcription failed: {error_msg}", gr.update(visible=False) except Exception as e: return { "error": f"Exception occurred during transcription: {str(e)}" }, f"❌ Transcription exception: {str(e)}", gr.update(visible=False) # Bind transcription button transcribe_btn.click( perform_transcription, inputs=[ audio_file_input, model_size_choice, language_choice, output_format_choice, enable_speaker_separation ], outputs=[ transcribe_result_output, transcribe_status, speaker_info_output ] ) # ==================== MP3 File Management Tab ==================== with gr.Tab("MP3 File Management"): gr.Markdown("### 🎵 MP3 File Management") dir_input = gr.Dropdown( label="Directory Path", choices=[ "/root/cache/apple_podcasts", "/root/cache/xyz_podcasts" ], value="/root/cache/apple_podcasts" ) file_list = gr.Textbox( label="MP3 File List", interactive=False, lines=10, max_lines=20, show_copy_button=True, autoscroll=True ) def list_mp3_files(directory): """List MP3 files in directory""" files = asyncio.run(get_mp3_files_tool(directory)) return "\n".join(files) if files else "No MP3 files found in directory" # Bind callback function dir_input.change( list_mp3_files, inputs=[dir_input], outputs=[file_list] ) # ==================== Transcription Text Management Tab ==================== with gr.Tab("Transcription Text Management"): gr.Markdown("### 📝 Transcription Text File Management") gr.Markdown("Manage and edit TXT and SRT files generated from audio transcription") with gr.Row(): with gr.Column(scale=2): # File path input file_path_input = gr.Textbox( label="File Path", placeholder="Enter path to TXT or SRT file to read", lines=1 ) # File information display file_info_output = gr.JSON(label="File Information", visible=False) with gr.Row(): load_file_btn = gr.Button("📂 Load File", variant="secondary") save_file_btn = gr.Button("💾 Save File", variant="primary") refresh_btn = gr.Button("🔄 Refresh", variant="secondary") with gr.Column(scale=1): # Read control gr.Markdown("#### 📖 Segmented Reading Control") current_position = gr.Number( label="Current Position (bytes)", value=0, minimum=0 ) chunk_size = gr.Number( label="Chunk Size (bytes)", value=65536, # 64KB minimum=1024, maximum=1048576 # Max 1MB ) with gr.Row(): prev_chunk_btn = gr.Button("⬅️ Previous", size="sm") next_chunk_btn = gr.Button("➡️ Next", size="sm") # Progress display progress_display = gr.Textbox( label="Reading Progress", value="No file loaded", interactive=False, lines=3 ) # Write control gr.Markdown("#### ✏️ Write Control") write_mode = gr.Radio( choices=["w", "a", "r+"], value="w", label="Write Mode", info="w=overwrite, a=append, r+=position" ) write_position = gr.Number( label="Write Position (bytes)", value=0, minimum=0, visible=False ) # Text content editor content_editor = gr.Textbox( label="File Content", placeholder="File content will be displayed here after loading...", lines=20, max_lines=30, show_copy_button=True, autoscroll=False ) # Status information status_output = gr.Textbox( label="Operation Status", interactive=False, lines=2 ) # Internal state variables file_state = gr.State({ "file_path": "", "file_size": 0, "current_pos": 0, "chunk_size": 65536, "content": "" }) def load_file_info(file_path): """Load file information""" if not file_path.strip(): return {}, "Please enter file path", "No file selected", gr.update(visible=False) info = asyncio.run(get_file_info_tool(file_path)) if info["status"] == "success": return ( info, f"File: {info['filename']} | Size: {info['file_size_mb']} MB", "File information loaded successfully", gr.update(visible=True) ) else: return ( {}, f"Error: {info.get('error_message', 'Unknown error')}", "Failed to load file information", gr.update(visible=False) ) def read_file_content(file_path, position, chunk_size): """Read file content""" if not file_path.strip(): return "", 0, "No file selected", { "file_path": "", "file_size": 0, "current_pos": 0, "chunk_size": chunk_size, "content": "" } result = asyncio.run(read_text_file_segments_tool(file_path, int(chunk_size), int(position))) if result["status"] == "success": new_state = { "file_path": file_path, "file_size": result["file_size"], "current_pos": result["current_position"], "chunk_size": chunk_size, "content": result["content"] } progress_text = ( f"Progress: {result['progress_percentage']:.1f}% " f"({result['current_position']}/{result['file_size']} bytes)\n" f"Boundary type: {result.get('actual_boundary', 'Unknown')}\n" f"{'End of file reached' if result['end_of_file_reached'] else 'More content available'}" ) return ( result["content"], result["current_position"], progress_text, new_state ) else: return ( "", position, f"Read failed: {result.get('error_message', 'Unknown error')}", { "file_path": file_path, "file_size": 0, "current_pos": position, "chunk_size": chunk_size, "content": "" } ) def save_file_content(file_path, content, mode, position): """Save file content""" if not file_path.strip(): return "Please select a file first" if not content.strip(): return "No content to save" # Determine whether to use position parameter based on mode write_pos = position if mode == "r+" else None result = write_text_file_content(file_path, content, mode, write_pos) if result["status"] == "success": operation_info = f"Operation: {result.get('operation_type', mode)}" size_info = f"Size change: {result.get('size_change', 0):+d} bytes" return f"Save successful!\n{operation_info}\nWrote {result['characters_written']} characters\n{size_info}" else: return f"Save failed: {result.get('error_message', 'Unknown error')}" def navigate_chunks(file_state, direction): """Navigate to previous or next chunk""" if not file_state["file_path"]: return file_state["current_pos"], "Please load a file first" chunk_size = file_state["chunk_size"] current_pos = file_state["current_pos"] if direction == "prev": new_pos = max(0, current_pos - chunk_size * 2) # Go back two chunks elif direction == "next": new_pos = current_pos # Next chunk starts from current position else: return current_pos, "Invalid navigation direction" return new_pos, f"Navigated to position: {new_pos}" # Bind event handlers load_file_btn.click( load_file_info, inputs=[file_path_input], outputs=[file_info_output, progress_display, status_output, file_info_output] ).then( read_file_content, inputs=[file_path_input, current_position, chunk_size], outputs=[content_editor, current_position, progress_display, file_state] ) refresh_btn.click( read_file_content, inputs=[file_path_input, current_position, chunk_size], outputs=[content_editor, current_position, progress_display, file_state] ) # Control position input visibility when write mode changes write_mode.change( lambda mode: gr.update(visible=(mode == "r+")), inputs=[write_mode], outputs=[write_position] ) save_file_btn.click( save_file_content, inputs=[file_path_input, content_editor, write_mode, write_position], outputs=[status_output] ) prev_chunk_btn.click( lambda state: navigate_chunks(state, "prev"), inputs=[file_state], outputs=[current_position, status_output] ).then( read_file_content, inputs=[file_path_input, current_position, chunk_size], outputs=[content_editor, current_position, progress_display, file_state] ) next_chunk_btn.click( lambda state: navigate_chunks(state, "next"), inputs=[file_state], outputs=[current_position, status_output] ).then( read_file_content, inputs=[file_path_input, current_position, chunk_size], outputs=[content_editor, current_position, progress_display, file_state] ) return demo