Spaces:

Agents-MCP-Hackathon
/

doc-mcp

Running

File size: 74,987 Bytes

import asyncio
import os
import time
import traceback
from typing import Dict, List

import gradio as gr
from dotenv import load_dotenv
from llama_index.core import Settings
from llama_index.core.text_splitter import SentenceSplitter

from rag.config import (
    delete_repository_data,
    embed_model,
    get_available_repos,
    get_repo_details,
    get_repository_stats,
    llm,
)
from rag.github_file_loader import fetch_markdown_files as fetch_files_with_loader
from rag.github_file_loader import fetch_repository_files, load_github_files
from rag.ingest import ingest_documents_async
from rag.query import QueryRetriever

load_dotenv()

Settings.llm = llm
Settings.embed_model = embed_model
Settings.node_parser = SentenceSplitter(chunk_size=3072)

# Environment variable to control repository management visibility
ENABLE_REPO_MANAGEMENT = os.getenv("ENABLE_REPO_MANAGEMENT", "true").lower() == "true"

def get_available_repositories():
    return get_available_repos()


def start_file_loading(
    repo_url: str, selected_files: List[str], current_progress: Dict
):
    """Step 1: Load files from GitHub"""
    print("\n🔄 STARTING FILE LOADING STEP")
    print(f"📍 Repository: {repo_url}")
    print(f"📋 Selected files: {selected_files}")

    if not selected_files:
        return {
            "status": "error",
            "message": "❌ No files selected for loading",
            "progress": 0,
            "details": "",
            "step": "file_loading",
        }

    total_files = len(selected_files)
    start_time = time.time()

    # Parse repo name from URL
    if "github.com" in repo_url:
        repo_name = (
            repo_url.replace("https://github.com/", "")
            .replace("http://github.com/", "")
            .strip("/")
        )
        if "/" not in repo_name:
            return {
                "status": "error",
                "message": "❌ Invalid repository URL format",
                "progress": 0,
                "details": "",
                "step": "file_loading",
            }
    else:
        repo_name = repo_url.strip()

    try:
        batch_size = 25
        all_documents = []
        all_failed = []

        current_progress.update(
            {
                "status": "loading",
                "message": f"🚀 Loading files from {repo_name}",
                "progress": 0,
                "total_files": total_files,
                "processed_files": 0,
                "phase": "File Loading",
                "details": f"Processing {total_files} files in batches...",
                "step": "file_loading",
            }
        )

        for i in range(0, len(selected_files), batch_size):
            batch = selected_files[i : i + batch_size]

            print(f"\n📦 PROCESSING BATCH {i // batch_size + 1}")
            print(f"   Files: {batch}")

            # Update progress for current batch
            progress_percentage = (i / total_files) * 100
            current_progress.update(
                {
                    "progress": progress_percentage,
                    "processed_files": i,
                    "current_batch": i // batch_size + 1,
                    "details": f"Loading batch {i // batch_size + 1}: {', '.join([f.split('/')[-1] for f in batch])}",
                }
            )

            try:
                documents, failed = load_github_files(
                    repo_name=repo_name,
                    file_paths=batch,
                    branch="main",
                    concurrent_requests=10,
                    github_token=os.getenv("GITHUB_API_KEY"),
                )

                print("✅ Load results:")
                print(f"   - Documents: {len(documents)}")
                print(f"   - Failed: {len(failed)}")

                if documents:
                    for j, doc in enumerate(documents):
                        print(f"   📄 Doc {j + 1}: {doc.doc_id}")
                        print(f"      Size: {len(doc.text)} chars")

                        # Ensure repo metadata is set
                        if "repo" not in doc.metadata:
                            doc.metadata["repo"] = repo_name
                            print(f"      ✅ Added repo metadata: {repo_name}")

                all_documents.extend(documents)
                all_failed.extend(failed)

            except Exception as batch_error:
                print(f"❌ Batch processing error: {batch_error}")
                all_failed.extend(batch)

        loading_time = time.time() - start_time

        # Store loaded documents in progress state for next step
        current_progress.update(
            {
                "status": "loaded",
                "message": f"✅ File Loading Complete! Loaded {len(all_documents)} documents",
                "progress": 100,
                "phase": "Files Loaded",
                "details": f"Successfully loaded {len(all_documents)} documents in {loading_time:.1f}s",
                "step": "file_loading_complete",
                "loaded_documents": all_documents,  # Store documents for next step
                "failed_files": all_failed,
                "loading_time": loading_time,
                "repo_name": repo_name,
            }
        )

        return current_progress

    except Exception as e:
        total_time = time.time() - start_time
        error_msg = f"❌ File loading error after {total_time:.1f}s: {str(e)}"
        print(error_msg)

        current_progress.update(
            {
                "status": "error",
                "message": error_msg,
                "progress": 0,
                "phase": "Failed",
                "details": str(e),
                "error": str(e),
                "step": "file_loading",
            }
        )

        return current_progress


def start_vector_ingestion(current_progress: Dict):
    """Step 2: Ingest loaded documents into vector store"""
    print("\n🔄 STARTING VECTOR INGESTION STEP")

    # Check if we have loaded documents from previous step
    if current_progress.get("step") != "file_loading_complete":
        return {
            "status": "error",
            "message": "❌ No loaded documents found. Please load files first.",
            "progress": 0,
            "details": "",
            "step": "vector_ingestion",
        }

    all_documents = current_progress.get("loaded_documents", [])
    repo_name = current_progress.get("repo_name", "")

    if not all_documents:
        return {
            "status": "error",
            "message": "❌ No documents available for vector ingestion",
            "progress": 0,
            "details": "",
            "step": "vector_ingestion",
        }

    vector_start_time = time.time()

    # Update state for vector store phase
    current_progress.update(
        {
            "status": "vectorizing",
            "message": "🔄 Generating embeddings and storing in vector database",
            "progress": 0,
            "phase": "Vector Store Ingestion",
            "details": f"Processing {len(all_documents)} documents for embedding...",
            "step": "vector_ingestion",
        }
    )

    try:
        print("🔄 STARTING VECTOR STORE INGESTION")
        print(f"   Repository: {repo_name}")
        print(f"   Documents to process: {len(all_documents)}")

        # Call the async ingestion function with repo name
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        try:
            loop.run_until_complete(ingest_documents_async(all_documents, repo_name))
        finally:
            loop.close()

        vector_time = time.time() - vector_start_time
        loading_time = current_progress.get("loading_time", 0)
        total_time = loading_time + vector_time

        print(f"✅ Vector ingestion completed in {vector_time:.2f} seconds")

        failed_files_data = current_progress.get("failed_files", [])
        if isinstance(failed_files_data, list):
            failed_files_count = len(failed_files_data)
        else:
            failed_files_count = (
                failed_files_data if isinstance(failed_files_data, int) else 0
            )

        # Update final success state with repository update flag
        current_progress.update(
            {
                "status": "complete",
                "message": "✅ Complete Ingestion Pipeline Finished!",
                "progress": 100,
                "phase": "Complete",
                "details": f"Successfully processed {len(all_documents)} documents for {repo_name}",
                "step": "complete",
                "total_time": total_time,
                "documents_processed": len(all_documents),
                "failed_files_count": failed_files_count,  # Use count instead of trying len()
                "failed_files": failed_files_data,  # Keep original data
                "vector_time": vector_time,
                "loading_time": loading_time,
                "repo_name": repo_name,
                "repository_updated": True,  # Flag to trigger repo list refresh
            }
        )

        return current_progress

    except Exception as ingest_error:
        vector_time = time.time() - vector_start_time
        print(f"❌ Vector ingestion failed after {vector_time:.2f} seconds")
        print(f"❌ Error: {ingest_error}")

        # Get failed files data safely
        failed_files_data = current_progress.get("failed_files", [])
        if isinstance(failed_files_data, list):
            failed_files_count = len(failed_files_data)
        else:
            failed_files_count = (
                failed_files_data if isinstance(failed_files_data, int) else 0
            )

        current_progress.update(
            {
                "status": "error",
                "message": "❌ Vector Store Ingestion Failed",
                "progress": 0,
                "phase": "Failed",
                "details": f"Error: {str(ingest_error)}",
                "error": str(ingest_error),
                "step": "vector_ingestion",
                "failed_files_count": failed_files_count,
                "failed_files": failed_files_data,
            }
        )

        return current_progress


def start_file_loading_generator(
    repo_url: str, selected_files: List[str], current_progress: Dict
):
    """Step 1: Load files from GitHub with yield-based real-time updates"""

    print("\n🔄 STARTING FILE LOADING STEP")
    print(f"📍 Repository: {repo_url}")
    print(f"📋 Selected files: {len(selected_files)} files")

    if not selected_files:
        error_progress = {
            "status": "error",
            "message": "❌ No files selected for loading",
            "progress": 0,
            "details": "Please select at least one file to proceed.",
            "step": "file_loading",
        }
        yield error_progress
        return error_progress

    total_files = len(selected_files)
    start_time = time.time()

    # Parse repo name from URL
    if "github.com" in repo_url:
        repo_name = (
            repo_url.replace("https://github.com/", "")
            .replace("http://github.com/", "")
            .strip("/")
        )
        if "/" not in repo_name:
            error_progress = {
                "status": "error",
                "message": "❌ Invalid repository URL format",
                "progress": 0,
                "details": "Expected format: owner/repo or https://github.com/owner/repo",
                "step": "file_loading",
            }
            yield error_progress
            return error_progress
    else:
        repo_name = repo_url.strip()

    try:
        batch_size = 10
        all_documents = []
        all_failed = []

        # Initial progress update
        initial_progress = {
            "status": "loading",
            "message": f"🚀 Starting file loading from {repo_name}",
            "progress": 0,
            "total_files": total_files,
            "processed_files": 0,
            "successful_files": 0,
            "failed_files": 0,
            "phase": "File Loading",
            "details": f"Preparing to load {total_files} files in batches of {batch_size}...",
            "step": "file_loading",
            "current_batch": 0,
            "total_batches": (len(selected_files) + batch_size - 1) // batch_size,
            "repo_name": repo_name,
        }
        yield initial_progress

        time.sleep(0.5)

        for i in range(0, len(selected_files), batch_size):
            batch = selected_files[i : i + batch_size]
            current_batch_num = i // batch_size + 1
            total_batches = (len(selected_files) + batch_size - 1) // batch_size

            # Update progress at batch start
            batch_start_progress = {
                "status": "loading",
                "message": f"🔄 Loading batch {current_batch_num}/{total_batches}",
                "progress": (i / total_files) * 90,
                "processed_files": i,
                "successful_files": len(all_documents),
                "failed_files": len(all_failed),
                "current_batch": current_batch_num,
                "total_batches": total_batches,
                "phase": "File Loading",
                "details": f"Processing batch {current_batch_num}: {', '.join([f.split('/')[-1] for f in batch[:3]])}{'...' if len(batch) > 3 else ''}",
                "step": "file_loading",
                "repo_name": repo_name,
            }
            yield batch_start_progress

            try:
                print(f"\n📦 PROCESSING BATCH {current_batch_num}/{total_batches}")
                print(f"   Files: {[f.split('/')[-1] for f in batch]}")

                documents, failed = load_github_files(
                    repo_name=repo_name,
                    file_paths=batch,
                    branch="main",
                    concurrent_requests=10,
                    github_token=os.getenv("GITHUB_API_KEY"),
                )

                print("✅ Load results:")
                print(f"   - Documents: {len(documents)}")
                print(f"   - Failed: {len(failed)}")

                # Process documents
                for j, doc in enumerate(documents):
                    print(f"   📄 Doc {j + 1}: {doc.doc_id}")
                    print(f"      Size: {len(doc.text)} chars")

                    if "repo" not in doc.metadata:
                        doc.metadata["repo"] = repo_name
                        print(f"      ✅ Added repo metadata: {repo_name}")

                all_documents.extend(documents)
                all_failed.extend(failed)

                # Update progress after batch completion
                batch_complete_progress = {
                    "status": "loading",
                    "message": f"✅ Completed batch {current_batch_num}/{total_batches}",
                    "progress": ((i + len(batch)) / total_files) * 90,
                    "processed_files": i + len(batch),
                    "successful_files": len(all_documents),
                    "failed_files": len(all_failed),
                    "current_batch": current_batch_num,
                    "total_batches": total_batches,
                    "phase": "File Loading",
                    "details": f"✅ Batch {current_batch_num} complete: {len(documents)} loaded, {len(failed)} failed. Total progress: {len(all_documents)} documents loaded.",
                    "step": "file_loading",
                    "repo_name": repo_name,
                }
                yield batch_complete_progress

                time.sleep(0.3)

            except Exception as batch_error:
                print(f"❌ Batch processing error: {batch_error}")
                all_failed.extend(batch)

                error_progress = {
                    "status": "loading",
                    "message": f"⚠️ Error in batch {current_batch_num}",
                    "progress": ((i + len(batch)) / total_files) * 90,
                    "processed_files": i + len(batch),
                    "successful_files": len(all_documents),
                    "failed_files": len(all_failed),
                    "current_batch": current_batch_num,
                    "phase": "File Loading",
                    "details": f"❌ Batch {current_batch_num} error: {str(batch_error)[:100]}... Continuing with next batch.",
                    "step": "file_loading",
                    "repo_name": repo_name,
                }
                yield error_progress

        loading_time = time.time() - start_time

        # Final completion update
        completion_progress = {
            "status": "loaded",
            "message": f"✅ File Loading Complete! Loaded {len(all_documents)} documents",
            "progress": 100,
            "phase": "Files Loaded Successfully",
            "details": f"🎯 Final Results:\n✅ Successfully loaded: {len(all_documents)} documents\n❌ Failed files: {len(all_failed)}\n⏱️ Total time: {loading_time:.1f}s\n📊 Success rate: {(len(all_documents) / (len(all_documents) + len(all_failed)) * 100):.1f}%",
            "step": "file_loading_complete",
            "loaded_documents": all_documents,
            "failed_files": all_failed,
            "loading_time": loading_time,
            "repo_name": repo_name,
            "total_files": total_files,
            "processed_files": total_files,
            "successful_files": len(all_documents),
        }
        yield completion_progress
        return completion_progress

    except Exception as e:
        total_time = time.time() - start_time
        error_msg = f"❌ File loading error after {total_time:.1f}s: {str(e)}"
        print(error_msg)

        error_progress = {
            "status": "error",
            "message": error_msg,
            "progress": 0,
            "phase": "Loading Failed",
            "details": f"Critical error during file loading:\n{str(e)}",
            "error": str(e),
            "step": "file_loading",
        }
        yield error_progress
        return error_progress


# Progress display component
def format_progress_display(progress_state: Dict) -> str:
    """Format progress state into readable display with enhanced details"""
    if not progress_state:
        return "🚀 Ready to start ingestion...\n\n📋 **Two-Step Process:**\n1️⃣ Load files from GitHub repository\n2️⃣ Generate embeddings and store in vector database"

    status = progress_state.get("status", "unknown")
    message = progress_state.get("message", "")
    progress = progress_state.get("progress", 0)
    phase = progress_state.get("phase", "")
    details = progress_state.get("details", "")

    # Enhanced progress bar
    filled = int(progress / 2.5)  # 40 chars total
    progress_bar = "█" * filled + "░" * (40 - filled)

    # Status emoji mapping
    status_emoji = {
        "loading": "⏳",
        "loaded": "✅",
        "vectorizing": "🧠",
        "complete": "🎉",
        "error": "❌",
    }

    emoji = status_emoji.get(status, "🔄")

    output = f"{emoji} **{message}**\n\n"

    # Phase and progress section
    output += f"📊 **Current Phase:** {phase}\n"
    output += f"📈 **Progress:** {progress:.1f}%\n"
    output += f"[{progress_bar}] {progress:.1f}%\n\n"

    # Step-specific details for file loading
    if progress_state.get("step") == "file_loading":
        processed = progress_state.get("processed_files", 0)
        total = progress_state.get("total_files", 0)
        successful = progress_state.get("successful_files", 0)
        failed = progress_state.get("failed_files", 0)

        if total > 0:
            output += "📁 **File Processing Status:**\n"
            output += f"   • Total files: {total}\n"
            output += f"   • Processed: {processed}/{total}\n"
            output += f"   • ✅ Successful: {successful}\n"
            output += f"   • ❌ Failed: {failed}\n"

            if "current_batch" in progress_state and "total_batches" in progress_state:
                output += f"   • 📦 Current batch: {progress_state['current_batch']}/{progress_state['total_batches']}\n"
            output += "\n"

    # Step-specific details for vector ingestion
    elif progress_state.get("step") == "vector_ingestion":
        docs_count = progress_state.get("documents_count", 0)
        repo_name = progress_state.get("repo_name", "Unknown")

        if docs_count > 0:
            output += "🧠 **Vector Processing Status:**\n"
            output += f"   • Repository: {repo_name}\n"
            output += f"   • Documents: {docs_count:,}\n"
            output += f"   • Stage: {phase}\n\n"

    # Detailed information
    output += f"📝 **Details:**\n{details}\n"

    # Final summary for completion
    if status == "complete":
        total_time = progress_state.get("total_time", 0)
        docs_processed = progress_state.get("documents_processed", 0)
        failed_files = progress_state.get("failed_files", 0)
        vector_time = progress_state.get("vector_time", 0)
        loading_time = progress_state.get("loading_time", 0)
        repo_name = progress_state.get("repo_name", "Unknown")

        output += "\n🎊 **INGESTION COMPLETED SUCCESSFULLY!**\n"
        output += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
        output += f"🎯 **Repository:** {repo_name}\n"
        output += f"📄 **Documents processed:** {docs_processed:,}\n"
        output += f"❌ **Failed files:** {len(failed_files) if isinstance(failed_files, list) else failed_files}\n"
        output += f"⏱️ **Total time:** {total_time:.1f} seconds\n"
        output += f"   ├─ File loading: {loading_time:.1f}s\n"
        output += f"   └─ Vector processing: {vector_time:.1f}s\n"
        output += (
            f"📊 **Processing rate:** {docs_processed / total_time:.1f} docs/second\n\n"
        )
        output += "🚀 **Next Step:** Go to the 'Query Interface' tab to start asking questions!"

    elif status == "error":
        error = progress_state.get("error", "Unknown error")
        output += "\n💥 **ERROR OCCURRED**\n"
        output += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
        output += (
            f"❌ **Error Details:** {error[:300]}{'...' if len(error) > 300 else ''}\n"
        )
        output += "\n🔧 **Troubleshooting Tips:**\n"
        output += "   • Check your GitHub token permissions\n"
        output += "   • Verify repository URL format\n"
        output += "   • Ensure selected files exist\n"
        output += "   • Check network connectivity\n"

    return output


# Create the main Gradio interface
with gr.Blocks(title="Doc-MCP") as demo:
    gr.Markdown("# 📚Doc-MCP: Documentation RAG System")
    gr.Markdown(
        "Transform GitHub documentation repositories into accessible MCP (Model Context Protocol) servers for AI agents. Upload documentation, generate vector embeddings, and query with intelligent context retrieval."
    )

    # State variables
    files_state = gr.State([])
    progress_state = gr.State({})

    with gr.Tabs():
        with gr.TabItem("📥 Documentation Ingestion"):
            gr.Markdown("### 🚀 Two-Step Documentation Processing Pipeline")
            gr.Markdown(
                "**Step 1:** Fetch markdown files from GitHub repository → **Step 2:** Generate vector embeddings and store in MongoDB Atlas"
            )

            with gr.Row():
                with gr.Column(scale=2):
                    repo_input = gr.Textbox(
                        label="📂 GitHub Repository URL",
                        placeholder="Enter: owner/repo or https://github.com/owner/repo (e.g., gradio-app/gradio)",
                        value="",
                        info="Enter any GitHub repository containing markdown documentation",
                    )
                    load_btn = gr.Button(
                        "🔍 Discover Documentation Files", variant="secondary"
                    )

                with gr.Column(scale=1):
                    status_output = gr.Textbox(
                        label="Repository Discovery Status",
                        interactive=False,
                        lines=4,
                        placeholder="Repository scanning results will appear here...",
                    )
            with gr.Row():
                select_all_btn = gr.Button(
                    "📋 Select All Documents", variant="secondary"
                )
                clear_all_btn = gr.Button("🗑️ Clear Selection", variant="secondary")

            # File selection
            with gr.Accordion(label="Available Documentation Files"):
                file_selector = gr.CheckboxGroup(
                    choices=[],
                    label="Select Markdown Files for RAG Processing",
                    visible=False,
                )

            # Two-step ingestion controls
            gr.Markdown("### 🔄 RAG Pipeline Execution")
            gr.Markdown(
                "Process your documentation through our advanced RAG pipeline using Nebius AI embeddings and MongoDB Atlas vector storage."
            )

            with gr.Row():
                with gr.Column():
                    step1_btn = gr.Button(
                        "📥 Step 1: Load Files from GitHub",
                        variant="primary",
                        size="lg",
                        interactive=False,
                    )

                with gr.Column():
                    step2_btn = gr.Button(
                        "🔄 Step 2: Start Ingestion",
                        variant="primary",
                        size="lg",
                        interactive=False,
                    )

            with gr.Row():
                refresh_btn = gr.Button("🔄 Refresh Progress", variant="secondary")
                reset_btn = gr.Button("🗑️ Reset Progress", variant="secondary")

            # Progress display
            progress_display = gr.Textbox(
                label="📊 Real-time Ingestion Progress",
                interactive=False,
                lines=25,
                value="🚀 Ready to start two-step ingestion process...\n\n📋 Steps:\n1️⃣ Load files from GitHub repository\n2️⃣ Generate embeddings and store in vector database",
                max_lines=30,
            )

            # Event handlers
            def load_files_handler(repo_url: str):
                if not repo_url.strip():
                    return (
                        gr.CheckboxGroup(choices=[], visible=False),
                        "Please enter a repository URL",
                        [],
                        gr.Button(interactive=False),
                        gr.Button(interactive=False),
                    )

                files, message = fetch_files_with_loader(repo_url)

                if files:
                    return (
                        gr.CheckboxGroup(
                            choices=files,
                            value=[],
                            label=f"Select Files from {repo_url} ({len(files)} files)",
                            visible=True,
                        ),
                        message,
                        files,
                        gr.Button(interactive=True),  # Enable step 1 button
                        gr.Button(interactive=False),  # Keep step 2 disabled
                    )
                else:
                    return (
                        gr.CheckboxGroup(choices=[], visible=False),
                        message,
                        [],
                        gr.Button(interactive=False),
                        gr.Button(interactive=False),
                    )

            def start_step1_generator(
                repo_url: str, selected_files: List[str], current_progress: Dict
            ):
                """Start Step 1 with generator-based real-time progress updates"""
                for progress_update in start_file_loading_generator(
                    repo_url, selected_files, current_progress.copy()
                ):
                    progress_text = format_progress_display(progress_update)
                    step2_enabled = (
                        progress_update.get("step") == "file_loading_complete"
                    )

                    yield (
                        progress_update,
                        progress_text,
                        gr.Button(interactive=step2_enabled),
                    )

            def start_step2(current_progress: Dict):
                """Start Step 2: Vector Ingestion"""
                new_progress = start_vector_ingestion(current_progress.copy())
                progress_text = format_progress_display(new_progress)
                return new_progress, progress_text

            def refresh_progress(current_progress: Dict):
                """Refresh the progress display"""
                progress_text = format_progress_display(current_progress)
                return progress_text

            def reset_progress():
                """Reset all progress"""
                return (
                    {},
                    "Ready to start two-step ingestion process...",
                    gr.Button(interactive=False),
                )

            def select_all_handler(available_files):
                if available_files:
                    return gr.CheckboxGroup(value=available_files)
                return gr.CheckboxGroup(value=[])

            def clear_all_handler():
                return gr.CheckboxGroup(value=[])

            # Wire up events
            load_btn.click(
                fn=load_files_handler,
                inputs=[repo_input],
                outputs=[
                    file_selector,
                    status_output,
                    files_state,
                    step1_btn,
                    step2_btn,
                ],
                show_api=False,
            )

            select_all_btn.click(
                fn=select_all_handler,
                inputs=[files_state],
                outputs=[file_selector],
                show_api=False,
            )

            clear_all_btn.click(
                fn=clear_all_handler, outputs=[file_selector], show_api=False
            )

            step1_btn.click(
                fn=start_step1_generator,
                inputs=[repo_input, file_selector, progress_state],
                outputs=[progress_state, progress_display, step2_btn],
                show_api=False,
            )

            step2_btn.click(
                fn=start_step2,
                inputs=[progress_state],
                outputs=[progress_state, progress_display],
                show_api=False,
            )

            refresh_btn.click(
                fn=refresh_progress,
                inputs=[progress_state],
                outputs=[progress_display],
                show_api=False,
            )

            reset_btn.click(
                fn=reset_progress,
                outputs=[progress_state, progress_display, step2_btn],
                show_api=False,
            )

        # ================================
        # Tab 2: Query Interface
        # ================================
        with gr.TabItem("🤖 AI Documentation Assistant"):
            gr.Markdown("### 💬 Intelligent Documentation Q&A")
            gr.Markdown(
                "Query your processed documentation using advanced semantic search. Get contextual answers with source citations powered by Nebius LLM and vector similarity search."
            )

            with gr.Row():
                with gr.Column(scale=2):
                    # Repository selection - Dropdown that becomes textbox when selected
                    with gr.Row():
                        repo_dropdown = gr.Dropdown(
                            choices=get_available_repositories()
                            or ["No repositories available"],
                            label="📚 Select Documentation Repository",
                            value=None,
                            interactive=True,
                            allow_custom_value=True,
                            info="Choose from available repositories",
                        )

                        # Hidden textbox that will become visible when repo is selected
                        selected_repo_textbox = gr.Textbox(
                            label="🎯 Selected Repository",
                            value="",
                            interactive=False,
                            visible=False,
                            info="Currently selected repository for querying",
                        )

                    refresh_repos_btn = gr.Button(
                        "🔄 Refresh Repository List", variant="secondary", size="sm"
                    )

                    # Query mode selection
                    query_mode = gr.Radio(
                        choices=["default", "text_search", "hybrid"],
                        label="🔍 Search Strategy",
                        value="default",
                        info="• default: Semantic similarity (AI understanding)\n• text_search: Keyword matching\n• hybrid: Combined approach for best results",
                    )

                    # Query input
                    query_input = gr.Textbox(
                        label="💭 Ask About Your Documentation",
                        placeholder="How do I implement a custom component? What are the available API endpoints? How to configure the system?",
                        lines=3,
                        info="Ask natural language questions about your documentation",
                    )

                    query_btn = gr.Button(
                        "🚀 Search Documentation", variant="primary", size="lg"
                    )

                    # Response display as text area
                    response_output = gr.Textbox(
                        label="🤖 AI Assistant Response",
                        value="Your AI-powered documentation response will appear here with contextual information and source citations...",
                        lines=10,
                        interactive=False,
                        info="Generated using Nebius LLM with retrieved documentation context",
                    )

                with gr.Column(scale=2):
                    gr.Markdown("### 📖 Source References")
                    gr.Markdown(
                        "View the exact documentation sources used to generate the response, with relevance scores and GitHub links."
                    )

                    # Source nodes display as JSON
                    sources_output = gr.JSON(
                        label="📎 Source Citations & Metadata",
                        value={
                            "message": "Source documentation excerpts with relevance scores will appear here after your query...",
                            "info": "Each source includes file path, relevance score, and content snippet",
                        },
                    )

            # Event handlers
            def handle_repo_selection(selected_repo):
                """Handle repository selection from dropdown"""
                if not selected_repo or selected_repo in [
                    "No repositories available",
                    "",
                ]:
                    return (
                        gr.Dropdown(visible=True),  # Keep dropdown visible
                        gr.Textbox(visible=False, value=""),  # Hide textbox
                        gr.Button(interactive=False),  # Disable query button
                    )
                else:
                    return (
                        gr.Dropdown(visible=False),  # Hide dropdown
                        gr.Textbox(
                            visible=True, value=selected_repo
                        ),  # Show textbox with selected repo
                        gr.Button(interactive=True),  # Enable query button
                    )

            def reset_repo_selection():
                """Reset to show dropdown again"""
                try:
                    repos = get_available_repositories() or [
                        "No repositories available"
                    ]
                    return (
                        gr.Dropdown(
                            choices=repos, value=None, visible=True
                        ),  # Show dropdown with refreshed choices
                        gr.Textbox(visible=False, value=""),  # Hide textbox
                        gr.Button(interactive=False),  # Disable query button
                    )
                except Exception as e:
                    print(f"Error refreshing repository list: {e}")
                    return (
                        gr.Dropdown(
                            choices=["Error loading repositories"],
                            value=None,
                            visible=True,
                        ),
                        gr.Textbox(visible=False, value=""),
                        gr.Button(interactive=False),
                    )

            def get_available_docs_repo():
                """
                List the available docs of repositories - should be called first to list out all the available repo docs to chat with

                Returns:
                    Updated dropdown with available repositories
                """
                try:
                    repos = get_available_repositories()
                    if not repos:
                        repos = [
                            "No repositories available - Please ingest documentation first"
                        ]
                    return gr.Dropdown(choices=repos, value=None)
                except Exception as e:
                    print(f"Error refreshing repository list: {e}")
                    return gr.Dropdown(
                        choices=["Error loading repositories"], value=None
                    )

            # Simple query handler
            def handle_query(repo: str, mode: str, query: str):
                """
                Handle query request - returns raw data from retriever
                Args:
                    repo: Selected repository from textbox
                    mode: Query mode (default, text_search, hybrid)
                    query: User's query
                Returns:
                    Raw result dict from QueryRetriever.make_query()
                """
                if not query.strip():
                    return {"error": "Please enter a query."}

                if not repo or repo in [
                    "No repositories available",
                    "Error loading repositories",
                    "",
                ]:
                    return {"error": "Please select a valid repository."}

                try:
                    # Create query retriever for the selected repo
                    retriever = QueryRetriever(repo)

                    # Make the query and return raw result
                    result = retriever.make_query(query, mode)
                    return result

                except Exception as e:
                    print(f"Query error: {e}")
                    traceback.print_exc()
                    return {"error": f"Query failed: {str(e)}"}

            def make_query(repo: str, mode: str, query: str):
                """
                Retrieve relevant documentation context for a given query using specified retrieval mode.

                This function is designed to support Retrieval-Augmented Generation (RAG) by extracting
                the most relevant context chunks from indexed documentation sources.
                Args:
                    repo: Selected repository from the textbox input
                    mode: Query mode (default, text_search, hybrid)
                    query: User's query
                Returns:
                    Tuple of (response_text, source_nodes_json)
                """
                # Get raw result
                result = handle_query(repo, mode, query)

                # Extract response text
                if "error" in result:
                    response_text = f"Error: {result['error']}"
                    source_nodes = {"error": result["error"]}
                else:
                    response_text = result.get("response", "No response available")
                    source_nodes = result.get("source_nodes", [])

                return response_text, source_nodes

            # Wire up events

            # Handle repository selection from dropdown
            repo_dropdown.change(
                fn=handle_repo_selection,
                inputs=[repo_dropdown],
                outputs=[repo_dropdown, selected_repo_textbox, query_btn],
                show_api=False,
            )

            # Handle refresh button - resets to dropdown view
            refresh_repos_btn.click(
                fn=reset_repo_selection,
                outputs=[repo_dropdown, selected_repo_textbox, query_btn],
                show_api=False,
            )

            # Also provide API endpoint for listing repositories
            refresh_repos_btn.click(
                fn=get_available_docs_repo,
                outputs=[repo_dropdown],
                api_name="list_available_docs",
            )

            # Query button uses the textbox value (not dropdown)
            query_btn.click(
                fn=make_query,
                inputs=[
                    selected_repo_textbox,
                    query_mode,
                    query_input,
                ],  # Use textbox, not dropdown
                outputs=[response_output, sources_output],
                api_name="query_documentation",
            )

            # Also allow Enter key to trigger query
            query_input.submit(
                fn=make_query,
                inputs=[
                    selected_repo_textbox,
                    query_mode,
                    query_input,
                ],  # Use textbox, not dropdown
                outputs=[response_output, sources_output],
                show_api=False,
            )

        # ================================
        # Tab 3: Repository Management
        # ================================
        with gr.TabItem("🗂️ Repository Management", visible=ENABLE_REPO_MANAGEMENT):
            gr.Markdown(
                "Manage your ingested repositories - view details and delete repositories when needed."
            )

            with gr.Row():
                with gr.Column(scale=1):
                    gr.Markdown("### 📊 Repository Statistics")
                    stats_display = gr.JSON(
                        label="Database Statistics",
                        value={"message": "Click refresh to load statistics..."},
                    )
                    refresh_stats_btn = gr.Button(
                        "🔄 Refresh Statistics", variant="secondary"
                    )

                with gr.Column(scale=2):
                    gr.Markdown("### 📋 Repository Details")
                    repos_table = gr.Dataframe(
                        headers=["Repository", "Files", "Last Updated"],
                        datatype=["str", "number", "str"],
                        label="Ingested Repositories",
                        interactive=False,
                        wrap=True,
                    )
                    refresh_repos_btn = gr.Button(
                        "🔄 Refresh Repository List", variant="secondary"
                    )

            gr.Markdown("### 🗑️ Delete Repository")
            gr.Markdown(
                "**⚠️ Warning:** This will permanently delete all documents and metadata for the selected repository."
            )

            with gr.Row():
                with gr.Column(scale=2):
                    delete_repo_dropdown = gr.Dropdown(
                        choices=[],
                        label="Select Repository to Delete",
                        value=None,
                        interactive=True,
                        allow_custom_value=False,
                    )

                    # Confirmation checkbox
                    confirm_delete = gr.Checkbox(
                        label="I understand this action cannot be undone", value=False
                    )

                    delete_btn = gr.Button(
                        "🗑️ Delete Repository",
                        variant="stop",
                        size="lg",
                        interactive=False,
                    )

                with gr.Column(scale=1):
                    deletion_status = gr.Textbox(
                        label="Deletion Status",
                        value="Select a repository and confirm to enable deletion.",
                        interactive=False,
                        lines=6,
                    )

            # Management functions
            def load_repository_stats():
                """Load overall repository statistics"""
                try:
                    stats = get_repository_stats()
                    return stats
                except Exception as e:
                    return {"error": f"Failed to load statistics: {str(e)}"}

            def load_repository_details():
                """Load detailed repository information as a table"""
                try:
                    details = get_repo_details()

                    if not details:
                        return [["No repositories found", 0, "N/A"]]

                    # Format for dataframe
                    table_data = []
                    for repo in details:
                        last_updated = repo.get("last_updated", "Unknown")
                        if hasattr(last_updated, "strftime"):
                            last_updated = last_updated.strftime("%Y-%m-%d %H:%M")
                        elif last_updated != "Unknown":
                            last_updated = str(last_updated)

                        table_data.append(
                            [
                                repo.get("repo_name", "Unknown"),
                                repo.get("file_count", 0),
                                last_updated,
                            ]
                        )

                    return table_data

                except Exception as e:
                    return [["Error loading repositories", 0, str(e)]]

            def update_delete_dropdown():
                """Update the dropdown with available repositories"""
                try:
                    repos = get_available_repositories()
                    return gr.Dropdown(choices=repos, value=None)
                except Exception as e:
                    print(f"Error updating delete dropdown: {e}")
                    return gr.Dropdown(choices=[], value=None)

            def check_delete_button_state(repo_selected, confirmation_checked):
                """Enable/disable delete button based on selection and confirmation"""
                if repo_selected and confirmation_checked:
                    return gr.Button(interactive=True)
                else:
                    return gr.Button(interactive=False)

            def delete_repository(repo_name: str, confirmed: bool):
                """Delete the selected repository"""
                if not repo_name:
                    return (
                        "❌ No repository selected.",
                        gr.Dropdown(choices=[]),
                        gr.Checkbox(value=False),
                    )

                if not confirmed:
                    return (
                        "❌ Please confirm deletion by checking the checkbox.",
                        gr.Dropdown(choices=[]),
                        gr.Checkbox(value=False),
                    )

                try:
                    # Perform deletion
                    result = delete_repository_data(repo_name)

                    # Prepare status message
                    status_msg = result["message"]
                    if result["success"]:
                        status_msg += "\n\n📊 Deletion Summary:"
                        status_msg += f"\n- Vector documents removed: {result['vector_docs_deleted']}"
                        status_msg += f"\n- Repository record deleted: {'Yes' if result['repo_record_deleted'] else 'No'}"
                        status_msg += f"\n\n✅ Repository '{repo_name}' has been completely removed."

                    # Update dropdown (remove deleted repo)
                    updated_dropdown = update_delete_dropdown()

                    # Reset confirmation checkbox
                    reset_checkbox = gr.Checkbox(value=False)

                    return status_msg, updated_dropdown, reset_checkbox

                except Exception as e:
                    error_msg = f"❌ Error deleting repository: {str(e)}"
                    return error_msg, gr.Dropdown(choices=[]), gr.Checkbox(value=False)

            # Wire up management events
            refresh_stats_btn.click(
                fn=load_repository_stats, outputs=[stats_display], show_api=False
            )

            refresh_repos_btn.click(
                fn=load_repository_details, outputs=[repos_table], show_api=False
            )

            # Update delete dropdown when refreshing repos
            refresh_repos_btn.click(
                fn=update_delete_dropdown,
                outputs=[delete_repo_dropdown],
                show_api=False,
            )

            # Enable/disable delete button based on selection and confirmation
            delete_repo_dropdown.change(
                fn=check_delete_button_state,
                inputs=[delete_repo_dropdown, confirm_delete],
                outputs=[delete_btn],
                show_api=False,
            )

            confirm_delete.change(
                fn=check_delete_button_state,
                inputs=[delete_repo_dropdown, confirm_delete],
                outputs=[delete_btn],
                show_api=False,
            )

            # Delete repository
            delete_btn.click(
                fn=delete_repository,
                inputs=[delete_repo_dropdown, confirm_delete],
                outputs=[deletion_status, delete_repo_dropdown, confirm_delete],
                show_api=False,
            )

            # Load data on tab load
            demo.load(fn=load_repository_stats, outputs=[stats_display], show_api=False)

            demo.load(fn=load_repository_details, outputs=[repos_table], show_api=False)

            demo.load(
                fn=update_delete_dropdown,
                outputs=[delete_repo_dropdown],
                show_api=False,
            )

        # ================================
        # Tab 4: GitHub File Search (Hidden API)
        # ================================
        with gr.TabItem("🔍 GitHub File Search", visible=False):
            gr.Markdown("### 🔧 GitHub Repository File Search API")
            gr.Markdown(
                "Pure API endpoints for GitHub file operations - all responses in JSON format"
            )

            with gr.Row():
                with gr.Column():
                    gr.Markdown("#### 📋 List Repository Files")

                    # Repository input for file operations
                    api_repo_input = gr.Textbox(
                        label="Repository URL",
                        placeholder="owner/repo or https://github.com/owner/repo",
                        value="",
                        info="GitHub repository to scan",
                    )

                    # Branch selection
                    api_branch_input = gr.Textbox(
                        label="Branch",
                        value="main",
                        placeholder="main",
                        info="Branch to search (default: main)",
                    )

                    # File extensions
                    api_extensions_input = gr.Textbox(
                        label="File Extensions (comma-separated)",
                        value=".md,.mdx",
                        placeholder=".md,.mdx,.txt",
                        info="File extensions to include",
                    )

                    # List files button
                    list_files_btn = gr.Button("📋 List Files", variant="primary")

                with gr.Column():
                    gr.Markdown("#### 📄 Get Single File")

                    # Single file inputs
                    single_repo_input = gr.Textbox(
                        label="Repository URL",
                        placeholder="owner/repo or https://github.com/owner/repo",
                        value="",
                        info="GitHub repository",
                    )

                    single_file_input = gr.Textbox(
                        label="File Path",
                        placeholder="docs/README.md",
                        value="",
                        info="Path to specific file in repository",
                    )

                    single_branch_input = gr.Textbox(
                        label="Branch",
                        value="main",
                        placeholder="main",
                        info="Branch name (default: main)",
                    )

                    # Get single file button
                    get_single_btn = gr.Button(
                        "📄 Get Single File", variant="secondary"
                    )

            with gr.Row():
                with gr.Column():
                    gr.Markdown("#### 📚 Get Multiple Files")

                    # Multiple files inputs
                    multiple_repo_input = gr.Textbox(
                        label="Repository URL",
                        placeholder="owner/repo or https://github.com/owner/repo",
                        value="",
                        info="GitHub repository",
                    )

                    multiple_files_input = gr.Textbox(
                        label="File Paths (comma-separated)",
                        placeholder="README.md,docs/guide.md,api/overview.md",
                        value="",
                        lines=3,
                        info="Comma-separated list of file paths",
                    )

                    multiple_branch_input = gr.Textbox(
                        label="Branch",
                        value="main",
                        placeholder="main",
                        info="Branch name (default: main)",
                    )

                    # Get multiple files button
                    get_multiple_btn = gr.Button(
                        "📚 Get Multiple Files", variant="secondary"
                    )

            # Single JSON output for all operations
            gr.Markdown("### 📊 API Response")
            api_response_output = gr.JSON(
                label="JSON Response",
                value={
                    "message": "API responses will appear here",
                    "info": "Use the buttons above to interact with GitHub repositories",
                },
            )

            # Pure API Functions (JSON only responses)
            def list_repository_files(
                repo_url: str, branch: str = "main", extensions: str = ".md,.mdx"
            ):
                """
                List all files in a GitHub repository with specified extensions

                Args:
                    repo_url: GitHub repository URL or owner/repo format
                    branch: Branch name to search (default: main)
                    extensions: Comma-separated file extensions (default: .md,.mdx)

                Returns:
                    JSON response with file list and metadata
                """
                try:
                    if not repo_url.strip():
                        return {"success": False, "error": "Repository URL is required"}

                    # Parse extensions list
                    ext_list = [
                        ext.strip() for ext in extensions.split(",") if ext.strip()
                    ]
                    if not ext_list:
                        ext_list = [".md", ".mdx"]

                    # Get files list
                    files, status_message = fetch_repository_files(
                        repo_url=repo_url,
                        file_extensions=ext_list,
                        github_token=os.getenv("GITHUB_API_KEY"),
                        branch=branch,
                    )

                    if files:
                        return {
                            "success": True,
                            "repository": repo_url,
                            "branch": branch,
                            "extensions": ext_list,
                            "total_files": len(files),
                            "files": files,
                            "status": status_message,
                        }
                    else:
                        return {
                            "success": False,
                            "repository": repo_url,
                            "branch": branch,
                            "extensions": ext_list,
                            "total_files": 0,
                            "files": [],
                            "error": status_message or "No files found",
                        }

                except Exception as e:
                    return {
                        "success": False,
                        "error": f"Failed to list files: {str(e)}",
                        "repository": repo_url,
                        "branch": branch,
                    }

            def get_single_file(repo_url: str, file_path: str, branch: str = "main"):
                """
                Retrieve a single file from GitHub repository

                Args:
                    repo_url: GitHub repository URL or owner/repo format
                    file_path: Path to the file in the repository
                    branch: Branch name (default: main)

                Returns:
                    JSON response with file content and metadata
                """
                try:
                    if not repo_url.strip():
                        return {"success": False, "error": "Repository URL is required"}

                    if not file_path.strip():
                        return {"success": False, "error": "File path is required"}

                    # Parse repo name
                    if "github.com" in repo_url:
                        repo_name = (
                            repo_url.replace("https://github.com/", "")
                            .replace("http://github.com/", "")
                            .strip("/")
                        )
                    else:
                        repo_name = repo_url.strip()

                    # Load single file
                    documents, failed = load_github_files(
                        repo_name=repo_name,
                        file_paths=[file_path.strip()],
                        branch=branch,
                        github_token=os.getenv("GITHUB_API_KEY"),
                    )

                    if documents and len(documents) > 0:
                        doc = documents[0]
                        return {
                            "success": True,
                            "repository": repo_name,
                            "branch": branch,
                            "file_path": file_path,
                            "file_name": doc.metadata.get("file_name", ""),
                            "file_size": len(doc.text),
                            "content": doc.text,
                            "metadata": doc.metadata,
                            "url": doc.metadata.get("url", ""),
                            "raw_url": doc.metadata.get("raw_url", ""),
                        }
                    else:
                        error_msg = f"Failed to retrieve file: {failed[0] if failed else 'File not found or access denied'}"
                        return {
                            "success": False,
                            "repository": repo_name,
                            "branch": branch,
                            "file_path": file_path,
                            "error": error_msg,
                        }

                except Exception as e:
                    return {
                        "success": False,
                        "error": f"Failed to get single file: {str(e)}",
                        "repository": repo_url,
                        "file_path": file_path,
                        "branch": branch,
                    }

            def get_multiple_files(
                repo_url: str, file_paths_str: str, branch: str = "main"
            ):
                """
                Retrieve multiple files from GitHub repository

                Args:
                    repo_url: GitHub repository URL or owner/repo format
                    file_paths_str: Comma-separated string of file paths
                    branch: Branch name (default: main)

                Returns:
                    JSON response with multiple file contents and metadata
                """
                try:
                    if not repo_url.strip():
                        return {"success": False, "error": "Repository URL is required"}

                    if not file_paths_str.strip():
                        return {"success": False, "error": "File paths are required"}

                    # Parse file paths from comma-separated string
                    file_paths = [
                        path.strip()
                        for path in file_paths_str.split(",")
                        if path.strip()
                    ]

                    if not file_paths:
                        return {
                            "success": False,
                            "error": "No valid file paths provided",
                        }

                    # Parse repo name
                    if "github.com" in repo_url:
                        repo_name = (
                            repo_url.replace("https://github.com/", "")
                            .replace("http://github.com/", "")
                            .strip("/")
                        )
                    else:
                        repo_name = repo_url.strip()

                    # Load multiple files
                    documents, failed = load_github_files(
                        repo_name=repo_name,
                        file_paths=file_paths,
                        branch=branch,
                        github_token=os.getenv("GITHUB_API_KEY"),
                    )

                    # Process successful documents
                    successful_files = []
                    for doc in documents:
                        file_data = {
                            "file_path": doc.metadata.get("file_path", ""),
                            "file_name": doc.metadata.get("file_name", ""),
                            "file_size": len(doc.text),
                            "content": doc.text,
                            "metadata": doc.metadata,
                            "url": doc.metadata.get("url", ""),
                            "raw_url": doc.metadata.get("raw_url", ""),
                        }
                        successful_files.append(file_data)

                    return {
                        "success": True,
                        "repository": repo_name,
                        "branch": branch,
                        "requested_files": len(file_paths),
                        "successful_files": len(successful_files),
                        "failed_files": len(failed),
                        "files": successful_files,
                        "failed_file_paths": failed,
                        "total_content_size": sum(len(doc.text) for doc in documents),
                        "requested_file_paths": file_paths,
                    }

                except Exception as e:
                    return {
                        "success": False,
                        "error": f"Failed to get multiple files: {str(e)}",
                        "repository": repo_url,
                        "file_paths": file_paths_str,
                        "branch": branch,
                    }

            # Wire up the GitHub file search events - all output to single JSON component
            list_files_btn.click(
                fn=list_repository_files,
                inputs=[api_repo_input, api_branch_input, api_extensions_input],
                outputs=[api_response_output],
                api_name="list_repository_files",
            )

            get_single_btn.click(
                fn=get_single_file,
                inputs=[single_repo_input, single_file_input, single_branch_input],
                outputs=[api_response_output],
                api_name="get_single_file",
            )

            get_multiple_btn.click(
                fn=get_multiple_files,
                inputs=[
                    multiple_repo_input,
                    multiple_files_input,
                    multiple_branch_input,
                ],
                outputs=[api_response_output],
                api_name="get_multiple_files",
            )

        # ================================
        # Tab 5: About & MCP Configuration
        # ================================
        with gr.TabItem("ℹ️ About & MCP Setup"):
            gr.Markdown("# 📚 Doc-MCP: Documentation RAG System")
            gr.Markdown(
                "**Transform GitHub documentation repositories into accessible MCP servers for AI agents.**"
            )

            with gr.Row():
                with gr.Column(scale=2):
                    # Project Overview
                    with gr.Accordion("🎯 What is Doc-MCP?", open=True):
                        gr.Markdown("""
                        **Doc-MCP** converts GitHub documentation into AI-queryable knowledge bases via the Model Context Protocol.
                        
                        **🔑 Key Features:**
                        - 📥 **GitHub Integration** - Automatic markdown file extraction
                        - 🧠 **AI Embeddings** - Nebius AI-powered vector search  
                        - 🔍 **Smart Search** - Semantic, keyword & hybrid modes
                        - 🤖 **MCP Server** - Direct AI agent integration
                        - ⚡ **Real-time** - Live processing progress
                        """)

                    # Quick Start Guide
                    with gr.Accordion("🚀 Quick Start", open=False):
                        gr.Markdown("""
                        **1. Ingest Documentation** → Enter GitHub repo URL → Select files → Run 2-step pipeline
                        
                        **2. Query with AI** → Select repository → Ask questions → Get answers with sources
                        
                        **3. Manage Repos** → View stats → Delete old repositories
                        
                        **4. Use MCP Tools** → Configure your AI agent → Query docs directly from IDE
                        """)

                with gr.Column(scale=2):
                    # MCP Server Configuration
                    with gr.Accordion("🔧 MCP Server Setup", open=True):
                        gr.Markdown("### 🌐 Server URL")

                        # Server URL
                        gr.Textbox(
                            value="https://agents-mcp-hackathon-doc-mcp.hf.space/gradio_api/mcp/sse",
                            label="MCP Endpoint",
                            interactive=False,
                            info="Copy this URL for your MCP client configuration",
                        )

                        gr.Markdown("### ⚙️ Configuration")

                        # SSE Configuration
                        with gr.Accordion("For Cursor, Windsurf, Cline", open=False):
                            sse_config = """{
  "mcpServers": {
    "doc-mcp": {
      "url": "https://agents-mcp-hackathon-doc-mcp.hf.space/gradio_api/mcp/sse"
    }
  }
}"""
                            gr.Code(
                                value=sse_config,
                                label="SSE Configuration",
                                language="json",
                                interactive=False,
                            )

                        # STDIO Configuration
                        with gr.Accordion(
                            "For STDIO Clients (Experimental)", open=False
                        ):
                            stdio_config = """{
  "mcpServers": {
    "doc-mcp": {
      "command": "npx",
      "args": ["mcp-remote", "https://agents-mcp-hackathon-doc-mcp.hf.space/gradio_api/mcp/sse", "--transport", "sse-only"]
    }
  }
}"""
                            gr.Code(
                                value=stdio_config,
                                label="STDIO Configuration",
                                language="json",
                                interactive=False,
                            )

            # MCP Tools Overview
            with gr.Row():
                with gr.Column():
                    gr.Markdown("### 🛠️ Available MCP Tools")

                    with gr.Row():
                        with gr.Column():
                            gr.Markdown("**🔍 Documentation Query Tools**")
                            gr.Markdown(
                                "• `get_available_docs_repo` - List repositories"
                            )
                            gr.Markdown("• `make_query` - Search documentation with AI")

                        with gr.Column():
                            gr.Markdown("**📁 GitHub File Tools**")
                            gr.Markdown("• `list_repository_files` - Scan repo files")
                            gr.Markdown("• `get_single_file` - Fetch one file")
                            gr.Markdown("• `get_multiple_files` - Fetch multiple files")

            # Technology Stack & Project Info
            with gr.Row():
                with gr.Column():
                    with gr.Accordion("⚙️ Technology Stack", open=False):
                        gr.Markdown("**🖥️ Frontend & API**")
                        gr.Markdown("• **Gradio** - Web interface & API framework")
                        gr.Markdown("• **Hugging Face Spaces** - Cloud hosting")

                        gr.Markdown("**🤖 AI & ML**")
                        gr.Markdown("• **Nebius AI** - LLM & embedding models")
                        gr.Markdown("• **LlamaIndex** - RAG framework")

                        gr.Markdown("**💾 Database & Storage**")
                        gr.Markdown("• **MongoDB Atlas** - Vector database")
                        gr.Markdown("• **GitHub API** - Source file access")

                        gr.Markdown("**🔌 Integration**")
                        gr.Markdown("• **Model Context Protocol** - AI agent standard")
                        gr.Markdown(
                            "• **Server-Sent Events** - Real-time communication"
                        )

                with gr.Column():
                    with gr.Accordion("👥 Project Information", open=False):
                        gr.Markdown("**🏆 MCP Hackathon Project**")
                        gr.Markdown(
                            "Created to showcase AI agent integration with documentation systems."
                        )

                        gr.Markdown("**💡 Inspiration**")
                        gr.Markdown("• Making Gradio docs easily searchable")
                        gr.Markdown("• Leveraging Hugging Face AI ecosystem")
                        gr.Markdown(
                            "• Improving developer experience with AI assistants"
                        )

                        gr.Markdown("**🔮 Future Plans**")
                        gr.Markdown("• Support for PDF, HTML files")
                        gr.Markdown("• Multi-language documentation")
                        gr.Markdown("• Custom embedding fine-tuning")

                        gr.Markdown("**📄 License:** MIT - Free to use and modify")

            # Usage Examples
            with gr.Row():
                with gr.Column():
                    with gr.Accordion("💡 Usage Examples", open=False):
                        gr.Markdown("### Example Workflow")

                        with gr.Row():
                            with gr.Column():
                                gr.Markdown("**📥 Step 1: Ingest Docs**")
                                gr.Code(
                                    value="1. Enter: gradio-app/gradio\n2. Select markdown files\n3. Run ingestion pipeline",
                                    label="Ingestion Process",
                                    interactive=False,
                                )

                            with gr.Column():
                                gr.Markdown("**🤖 Step 2: Query with AI**")
                                gr.Code(
                                    value='Query: "How to create custom components?"\nResponse: Detailed answer with source links',
                                    label="AI Query Example",
                                    interactive=False,
                                )

                        gr.Markdown("### MCP Tool Usage")
                        gr.Code(
                            value="""# In your AI agent:
1. Call: get_available_docs_repo() -> ["gradio-app/gradio", ...]  
2. Call: make_query("gradio-app/gradio", "default", "custom components")
3. Get: AI response + source citations""",
                            label="MCP Integration Example",
                            language="python",
                            interactive=False,
                        )

if __name__ == "__main__":
    demo.launch(mcp_server=True)