ModalTranscriberMCP / src /ui /gradio_ui.py
richard-su's picture
Upload folder using huggingface_hub
60b9fce verified
raw
history blame
25.2 kB
"""
Gradio interface module
Contains all UI components and interface logic
"""
import gradio as gr
import asyncio
from ..tools import mcp_tools
from ..tools.download_tools import get_file_info_tool, get_mp3_files_tool, read_text_file_segments_tool
from ..tools.transcription_tools import transcribe_audio_file_tool
import os
def write_text_file_content(file_path: str, content: str, mode: str = "w", position: int = None):
"""Simple text file writing function"""
try:
if mode == "r+" and position is not None:
with open(file_path, mode, encoding='utf-8') as f:
f.seek(position)
characters_written = f.write(content)
else:
with open(file_path, mode, encoding='utf-8') as f:
characters_written = f.write(content)
return {
"status": "success",
"characters_written": characters_written,
"operation_type": mode,
"size_change": len(content)
}
except Exception as e:
return {
"status": "failed",
"error_message": str(e)
}
def create_gradio_interface():
"""Create Gradio interface
Returns:
gr.Blocks: Configured Gradio interface
"""
with gr.Blocks(title="MCP Tool Server") as demo:
gr.Markdown("# ๐Ÿค– Gradio + FastMCP Server")
gr.Markdown("This server provides both Gradio UI and FastMCP tools!")
# ==================== Podcast Download Tab ====================
with gr.Tab("Podcast Download"):
gr.Markdown("### ๐ŸŽ™๏ธ Download Podcast Audio")
url_input = gr.Textbox(
label="Podcast Link",
placeholder="Enter podcast page URL",
lines=1
)
platform_choice = gr.Radio(
choices=["Apple Podcast", "XiaoYuZhou"],
label="Select Podcast Platform",
value="Apple Podcast"
)
# Transcription options
with gr.Row():
auto_transcribe = gr.Checkbox(
label="Auto-transcribe after download",
value=True,
info="Start transcription immediately after download"
)
enable_speaker_diarization = gr.Checkbox(
label="Enable speaker diarization",
value=False,
info="Identify different speakers (requires Hugging Face Token)"
)
download_btn = gr.Button("๐Ÿ“ฅ Start Download", variant="primary")
result_output = gr.JSON(label="Download Results")
async def download_podcast_and_transcribe(url, platform, auto_transcribe, enable_speaker):
"""Call corresponding download tool based on selected platform"""
if platform == "Apple Podcast":
download_result = await mcp_tools.download_apple_podcast(url)
else:
download_result = await mcp_tools.download_xyz_podcast(url)
# 2. Check if download was successful
if download_result["status"] != "success":
return {
"download_status": "failed",
"error_message": download_result.get("error_message", "Download failed"),
"transcription_status": "not_started"
}
# 3. If not auto-transcribing, return only download results
if not auto_transcribe:
return {
"download_status": "success",
"audio_file": download_result["audio_file_path"],
"transcription_status": "skipped (user chose not to auto-transcribe)"
}
# 4. Start transcription
try:
audio_path = download_result["audio_file_path"]
print(f"Transcribing audio file: {audio_path}")
transcribe_result = await mcp_tools.transcribe_audio_file(
audio_path,
model_size="turbo",
language=None,
output_format="srt",
enable_speaker_diarization=enable_speaker
)
# 5. Merge results
result = {
"download_status": "success",
"audio_file": audio_path,
"transcription_status": "success",
"txt_file_path": transcribe_result.get("txt_file_path"),
"srt_file_path": transcribe_result.get("srt_file_path"),
"transcription_details": {
"model_used": transcribe_result.get("model_used"),
"segment_count": transcribe_result.get("segment_count"),
"audio_duration": transcribe_result.get("audio_duration"),
"saved_files": transcribe_result.get("saved_files", []),
"speaker_diarization_enabled": transcribe_result.get("speaker_diarization_enabled", False)
}
}
# 6. Add speaker diarization info if enabled
if enable_speaker and transcribe_result.get("speaker_diarization_enabled", False):
result["speaker_diarization"] = {
"global_speaker_count": transcribe_result.get("global_speaker_count", 0),
"speaker_summary": transcribe_result.get("speaker_summary", {})
}
return result
except Exception as e:
return {
"download_status": "success",
"audio_file": download_result["audio_file_path"],
"transcription_status": "failed",
"error_message": str(e)
}
# Bind callback function
download_btn.click(
download_podcast_and_transcribe,
inputs=[url_input, platform_choice, auto_transcribe, enable_speaker_diarization],
outputs=result_output
)
# ==================== Audio Transcription Tab ====================
with gr.Tab("Audio Transcription"):
gr.Markdown("### ๐ŸŽค Audio Transcription and Speaker Diarization")
gr.Markdown("Upload audio files for high-quality transcription with speaker diarization support")
with gr.Row():
with gr.Column(scale=2):
# Audio file input
audio_file_input = gr.Textbox(
label="Audio File Path",
placeholder="Enter complete path to audio file (supports mp3, wav, m4a, etc.)",
lines=1
)
# Transcription parameter settings
with gr.Row():
model_size_choice = gr.Dropdown(
choices=["tiny", "base", "small", "medium", "large", "turbo"],
value="turbo",
label="Model Size",
info="Affects transcription accuracy and speed"
)
language_choice = gr.Dropdown(
choices=["auto", "zh", "en", "ja", "ko", "fr", "de", "es"],
value="auto",
label="Language",
info="auto=auto-detect"
)
with gr.Row():
output_format_choice = gr.Radio(
choices=["srt", "txt", "json"],
value="srt",
label="Output Format"
)
enable_speaker_separation = gr.Checkbox(
label="Enable speaker diarization",
value=False,
info="Requires Hugging Face Token"
)
transcribe_btn = gr.Button("๐ŸŽค Start Transcription", variant="primary", size="lg")
with gr.Column(scale=1):
# Audio file information
audio_info_output = gr.JSON(label="Audio File Information", visible=False)
# Transcription progress and status
transcribe_status = gr.Textbox(
label="Transcription Status",
value="Waiting to start transcription...",
interactive=False,
lines=3
)
# Transcription results display
transcribe_result_output = gr.JSON(
label="Transcription Results",
visible=True
)
# Speaker diarization results (if enabled)
speaker_info_output = gr.JSON(
label="Speaker Diarization Information",
visible=False
)
def perform_transcription(audio_path, model_size, language, output_format, enable_speaker):
"""Execute audio transcription"""
if not audio_path.strip():
return {
"error": "Please enter audio file path"
}, "Transcription failed: No audio file selected", gr.update(visible=False)
# Check if file exists
import asyncio
file_info = asyncio.run(get_file_info_tool(audio_path))
if file_info["status"] != "success":
return {
"error": f"File does not exist or cannot be accessed: {file_info.get('error_message', 'Unknown error')}"
}, "Transcription failed: File inaccessible", gr.update(visible=False)
try:
# Process language parameter
lang = None if language == "auto" else language
# Call transcription tool
result = asyncio.run(transcribe_audio_file_tool(
audio_file_path=audio_path,
model_size=model_size,
language=lang,
output_format=output_format,
enable_speaker_diarization=enable_speaker
))
# Prepare status information
if result.get("processing_status") == "success":
status_text = f"""โœ… Transcription completed!
๐Ÿ“ Generated files: {len(result.get('saved_files', []))} files
๐ŸŽต Audio duration: {result.get('audio_duration', 0):.2f} seconds
๐Ÿ“ Transcription segments: {result.get('segment_count', 0)} segments
๐ŸŽฏ Model used: {result.get('model_used', 'N/A')}
๐ŸŽญ Speaker diarization: {'Enabled' if result.get('speaker_diarization_enabled', False) else 'Disabled'}"""
# Show speaker information
speaker_visible = result.get('speaker_diarization_enabled', False) and result.get('global_speaker_count', 0) > 0
speaker_info = result.get('speaker_summary', {}) if speaker_visible else {}
return result, status_text, gr.update(visible=speaker_visible, value=speaker_info)
else:
error_msg = result.get('error_message', 'Unknown error')
return result, f"โŒ Transcription failed: {error_msg}", gr.update(visible=False)
except Exception as e:
return {
"error": f"Exception occurred during transcription: {str(e)}"
}, f"โŒ Transcription exception: {str(e)}", gr.update(visible=False)
# Bind transcription button
transcribe_btn.click(
perform_transcription,
inputs=[
audio_file_input,
model_size_choice,
language_choice,
output_format_choice,
enable_speaker_separation
],
outputs=[
transcribe_result_output,
transcribe_status,
speaker_info_output
]
)
# ==================== MP3 File Management Tab ====================
with gr.Tab("MP3 File Management"):
gr.Markdown("### ๐ŸŽต MP3 File Management")
dir_input = gr.Dropdown(
label="Directory Path",
choices=[
"/root/cache/apple_podcasts",
"/root/cache/xyz_podcasts"
],
value="/root/cache/apple_podcasts"
)
file_list = gr.Textbox(
label="MP3 File List",
interactive=False,
lines=10,
max_lines=20,
show_copy_button=True,
autoscroll=True
)
def list_mp3_files(directory):
"""List MP3 files in directory"""
files = asyncio.run(get_mp3_files_tool(directory))
return "\n".join(files) if files else "No MP3 files found in directory"
# Bind callback function
dir_input.change(
list_mp3_files,
inputs=[dir_input],
outputs=[file_list]
)
# ==================== Transcription Text Management Tab ====================
with gr.Tab("Transcription Text Management"):
gr.Markdown("### ๐Ÿ“ Transcription Text File Management")
gr.Markdown("Manage and edit TXT and SRT files generated from audio transcription")
with gr.Row():
with gr.Column(scale=2):
# File path input
file_path_input = gr.Textbox(
label="File Path",
placeholder="Enter path to TXT or SRT file to read",
lines=1
)
# File information display
file_info_output = gr.JSON(label="File Information", visible=False)
with gr.Row():
load_file_btn = gr.Button("๐Ÿ“‚ Load File", variant="secondary")
save_file_btn = gr.Button("๐Ÿ’พ Save File", variant="primary")
refresh_btn = gr.Button("๐Ÿ”„ Refresh", variant="secondary")
with gr.Column(scale=1):
# Read control
gr.Markdown("#### ๐Ÿ“– Segmented Reading Control")
current_position = gr.Number(
label="Current Position (bytes)",
value=0,
minimum=0
)
chunk_size = gr.Number(
label="Chunk Size (bytes)",
value=65536, # 64KB
minimum=1024,
maximum=1048576 # Max 1MB
)
with gr.Row():
prev_chunk_btn = gr.Button("โฌ…๏ธ Previous", size="sm")
next_chunk_btn = gr.Button("โžก๏ธ Next", size="sm")
# Progress display
progress_display = gr.Textbox(
label="Reading Progress",
value="No file loaded",
interactive=False,
lines=3
)
# Write control
gr.Markdown("#### โœ๏ธ Write Control")
write_mode = gr.Radio(
choices=["w", "a", "r+"],
value="w",
label="Write Mode",
info="w=overwrite, a=append, r+=position"
)
write_position = gr.Number(
label="Write Position (bytes)",
value=0,
minimum=0,
visible=False
)
# Text content editor
content_editor = gr.Textbox(
label="File Content",
placeholder="File content will be displayed here after loading...",
lines=20,
max_lines=30,
show_copy_button=True,
autoscroll=False
)
# Status information
status_output = gr.Textbox(
label="Operation Status",
interactive=False,
lines=2
)
# Internal state variables
file_state = gr.State({
"file_path": "",
"file_size": 0,
"current_pos": 0,
"chunk_size": 65536,
"content": ""
})
def load_file_info(file_path):
"""Load file information"""
if not file_path.strip():
return {}, "Please enter file path", "No file selected", gr.update(visible=False)
info = asyncio.run(get_file_info_tool(file_path))
if info["status"] == "success":
return (
info,
f"File: {info['filename']} | Size: {info['file_size_mb']} MB",
"File information loaded successfully",
gr.update(visible=True)
)
else:
return (
{},
f"Error: {info.get('error_message', 'Unknown error')}",
"Failed to load file information",
gr.update(visible=False)
)
def read_file_content(file_path, position, chunk_size):
"""Read file content"""
if not file_path.strip():
return "", 0, "No file selected", {
"file_path": "",
"file_size": 0,
"current_pos": 0,
"chunk_size": chunk_size,
"content": ""
}
result = asyncio.run(read_text_file_segments_tool(file_path, int(chunk_size), int(position)))
if result["status"] == "success":
new_state = {
"file_path": file_path,
"file_size": result["file_size"],
"current_pos": result["current_position"],
"chunk_size": chunk_size,
"content": result["content"]
}
progress_text = (
f"Progress: {result['progress_percentage']:.1f}% "
f"({result['current_position']}/{result['file_size']} bytes)\n"
f"Boundary type: {result.get('actual_boundary', 'Unknown')}\n"
f"{'End of file reached' if result['end_of_file_reached'] else 'More content available'}"
)
return (
result["content"],
result["current_position"],
progress_text,
new_state
)
else:
return (
"",
position,
f"Read failed: {result.get('error_message', 'Unknown error')}",
{
"file_path": file_path,
"file_size": 0,
"current_pos": position,
"chunk_size": chunk_size,
"content": ""
}
)
def save_file_content(file_path, content, mode, position):
"""Save file content"""
if not file_path.strip():
return "Please select a file first"
if not content.strip():
return "No content to save"
# Determine whether to use position parameter based on mode
write_pos = position if mode == "r+" else None
result = write_text_file_content(file_path, content, mode, write_pos)
if result["status"] == "success":
operation_info = f"Operation: {result.get('operation_type', mode)}"
size_info = f"Size change: {result.get('size_change', 0):+d} bytes"
return f"Save successful!\n{operation_info}\nWrote {result['characters_written']} characters\n{size_info}"
else:
return f"Save failed: {result.get('error_message', 'Unknown error')}"
def navigate_chunks(file_state, direction):
"""Navigate to previous or next chunk"""
if not file_state["file_path"]:
return file_state["current_pos"], "Please load a file first"
chunk_size = file_state["chunk_size"]
current_pos = file_state["current_pos"]
if direction == "prev":
new_pos = max(0, current_pos - chunk_size * 2) # Go back two chunks
elif direction == "next":
new_pos = current_pos # Next chunk starts from current position
else:
return current_pos, "Invalid navigation direction"
return new_pos, f"Navigated to position: {new_pos}"
# Bind event handlers
load_file_btn.click(
load_file_info,
inputs=[file_path_input],
outputs=[file_info_output, progress_display, status_output, file_info_output]
).then(
read_file_content,
inputs=[file_path_input, current_position, chunk_size],
outputs=[content_editor, current_position, progress_display, file_state]
)
refresh_btn.click(
read_file_content,
inputs=[file_path_input, current_position, chunk_size],
outputs=[content_editor, current_position, progress_display, file_state]
)
# Control position input visibility when write mode changes
write_mode.change(
lambda mode: gr.update(visible=(mode == "r+")),
inputs=[write_mode],
outputs=[write_position]
)
save_file_btn.click(
save_file_content,
inputs=[file_path_input, content_editor, write_mode, write_position],
outputs=[status_output]
)
prev_chunk_btn.click(
lambda state: navigate_chunks(state, "prev"),
inputs=[file_state],
outputs=[current_position, status_output]
).then(
read_file_content,
inputs=[file_path_input, current_position, chunk_size],
outputs=[content_editor, current_position, progress_display, file_state]
)
next_chunk_btn.click(
lambda state: navigate_chunks(state, "next"),
inputs=[file_state],
outputs=[current_position, status_output]
).then(
read_file_content,
inputs=[file_path_input, current_position, chunk_size],
outputs=[content_editor, current_position, progress_display, file_state]
)
return demo