doc-mcp / app.py
mdabidhussain's picture
added flag to enable disable repo management
831be6f
import asyncio
import os
import time
import traceback
from typing import Dict, List
import gradio as gr
from dotenv import load_dotenv
from llama_index.core import Settings
from llama_index.core.text_splitter import SentenceSplitter
from rag.config import (
delete_repository_data,
embed_model,
get_available_repos,
get_repo_details,
get_repository_stats,
llm,
)
from rag.github_file_loader import fetch_markdown_files as fetch_files_with_loader
from rag.github_file_loader import fetch_repository_files, load_github_files
from rag.ingest import ingest_documents_async
from rag.query import QueryRetriever
load_dotenv()
Settings.llm = llm
Settings.embed_model = embed_model
Settings.node_parser = SentenceSplitter(chunk_size=3072)
# Environment variable to control repository management visibility
ENABLE_REPO_MANAGEMENT = os.getenv("ENABLE_REPO_MANAGEMENT", "true").lower() == "true"
def get_available_repositories():
return get_available_repos()
def start_file_loading(
repo_url: str, selected_files: List[str], current_progress: Dict
):
"""Step 1: Load files from GitHub"""
print("\n๐Ÿ”„ STARTING FILE LOADING STEP")
print(f"๐Ÿ“ Repository: {repo_url}")
print(f"๐Ÿ“‹ Selected files: {selected_files}")
if not selected_files:
return {
"status": "error",
"message": "โŒ No files selected for loading",
"progress": 0,
"details": "",
"step": "file_loading",
}
total_files = len(selected_files)
start_time = time.time()
# Parse repo name from URL
if "github.com" in repo_url:
repo_name = (
repo_url.replace("https://github.com/", "")
.replace("http://github.com/", "")
.strip("/")
)
if "/" not in repo_name:
return {
"status": "error",
"message": "โŒ Invalid repository URL format",
"progress": 0,
"details": "",
"step": "file_loading",
}
else:
repo_name = repo_url.strip()
try:
batch_size = 25
all_documents = []
all_failed = []
current_progress.update(
{
"status": "loading",
"message": f"๐Ÿš€ Loading files from {repo_name}",
"progress": 0,
"total_files": total_files,
"processed_files": 0,
"phase": "File Loading",
"details": f"Processing {total_files} files in batches...",
"step": "file_loading",
}
)
for i in range(0, len(selected_files), batch_size):
batch = selected_files[i : i + batch_size]
print(f"\n๐Ÿ“ฆ PROCESSING BATCH {i // batch_size + 1}")
print(f" Files: {batch}")
# Update progress for current batch
progress_percentage = (i / total_files) * 100
current_progress.update(
{
"progress": progress_percentage,
"processed_files": i,
"current_batch": i // batch_size + 1,
"details": f"Loading batch {i // batch_size + 1}: {', '.join([f.split('/')[-1] for f in batch])}",
}
)
try:
documents, failed = load_github_files(
repo_name=repo_name,
file_paths=batch,
branch="main",
concurrent_requests=10,
github_token=os.getenv("GITHUB_API_KEY"),
)
print("โœ… Load results:")
print(f" - Documents: {len(documents)}")
print(f" - Failed: {len(failed)}")
if documents:
for j, doc in enumerate(documents):
print(f" ๐Ÿ“„ Doc {j + 1}: {doc.doc_id}")
print(f" Size: {len(doc.text)} chars")
# Ensure repo metadata is set
if "repo" not in doc.metadata:
doc.metadata["repo"] = repo_name
print(f" โœ… Added repo metadata: {repo_name}")
all_documents.extend(documents)
all_failed.extend(failed)
except Exception as batch_error:
print(f"โŒ Batch processing error: {batch_error}")
all_failed.extend(batch)
loading_time = time.time() - start_time
# Store loaded documents in progress state for next step
current_progress.update(
{
"status": "loaded",
"message": f"โœ… File Loading Complete! Loaded {len(all_documents)} documents",
"progress": 100,
"phase": "Files Loaded",
"details": f"Successfully loaded {len(all_documents)} documents in {loading_time:.1f}s",
"step": "file_loading_complete",
"loaded_documents": all_documents, # Store documents for next step
"failed_files": all_failed,
"loading_time": loading_time,
"repo_name": repo_name,
}
)
return current_progress
except Exception as e:
total_time = time.time() - start_time
error_msg = f"โŒ File loading error after {total_time:.1f}s: {str(e)}"
print(error_msg)
current_progress.update(
{
"status": "error",
"message": error_msg,
"progress": 0,
"phase": "Failed",
"details": str(e),
"error": str(e),
"step": "file_loading",
}
)
return current_progress
def start_vector_ingestion(current_progress: Dict):
"""Step 2: Ingest loaded documents into vector store"""
print("\n๐Ÿ”„ STARTING VECTOR INGESTION STEP")
# Check if we have loaded documents from previous step
if current_progress.get("step") != "file_loading_complete":
return {
"status": "error",
"message": "โŒ No loaded documents found. Please load files first.",
"progress": 0,
"details": "",
"step": "vector_ingestion",
}
all_documents = current_progress.get("loaded_documents", [])
repo_name = current_progress.get("repo_name", "")
if not all_documents:
return {
"status": "error",
"message": "โŒ No documents available for vector ingestion",
"progress": 0,
"details": "",
"step": "vector_ingestion",
}
vector_start_time = time.time()
# Update state for vector store phase
current_progress.update(
{
"status": "vectorizing",
"message": "๐Ÿ”„ Generating embeddings and storing in vector database",
"progress": 0,
"phase": "Vector Store Ingestion",
"details": f"Processing {len(all_documents)} documents for embedding...",
"step": "vector_ingestion",
}
)
try:
print("๐Ÿ”„ STARTING VECTOR STORE INGESTION")
print(f" Repository: {repo_name}")
print(f" Documents to process: {len(all_documents)}")
# Call the async ingestion function with repo name
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
loop.run_until_complete(ingest_documents_async(all_documents, repo_name))
finally:
loop.close()
vector_time = time.time() - vector_start_time
loading_time = current_progress.get("loading_time", 0)
total_time = loading_time + vector_time
print(f"โœ… Vector ingestion completed in {vector_time:.2f} seconds")
failed_files_data = current_progress.get("failed_files", [])
if isinstance(failed_files_data, list):
failed_files_count = len(failed_files_data)
else:
failed_files_count = (
failed_files_data if isinstance(failed_files_data, int) else 0
)
# Update final success state with repository update flag
current_progress.update(
{
"status": "complete",
"message": "โœ… Complete Ingestion Pipeline Finished!",
"progress": 100,
"phase": "Complete",
"details": f"Successfully processed {len(all_documents)} documents for {repo_name}",
"step": "complete",
"total_time": total_time,
"documents_processed": len(all_documents),
"failed_files_count": failed_files_count, # Use count instead of trying len()
"failed_files": failed_files_data, # Keep original data
"vector_time": vector_time,
"loading_time": loading_time,
"repo_name": repo_name,
"repository_updated": True, # Flag to trigger repo list refresh
}
)
return current_progress
except Exception as ingest_error:
vector_time = time.time() - vector_start_time
print(f"โŒ Vector ingestion failed after {vector_time:.2f} seconds")
print(f"โŒ Error: {ingest_error}")
# Get failed files data safely
failed_files_data = current_progress.get("failed_files", [])
if isinstance(failed_files_data, list):
failed_files_count = len(failed_files_data)
else:
failed_files_count = (
failed_files_data if isinstance(failed_files_data, int) else 0
)
current_progress.update(
{
"status": "error",
"message": "โŒ Vector Store Ingestion Failed",
"progress": 0,
"phase": "Failed",
"details": f"Error: {str(ingest_error)}",
"error": str(ingest_error),
"step": "vector_ingestion",
"failed_files_count": failed_files_count,
"failed_files": failed_files_data,
}
)
return current_progress
def start_file_loading_generator(
repo_url: str, selected_files: List[str], current_progress: Dict
):
"""Step 1: Load files from GitHub with yield-based real-time updates"""
print("\n๐Ÿ”„ STARTING FILE LOADING STEP")
print(f"๐Ÿ“ Repository: {repo_url}")
print(f"๐Ÿ“‹ Selected files: {len(selected_files)} files")
if not selected_files:
error_progress = {
"status": "error",
"message": "โŒ No files selected for loading",
"progress": 0,
"details": "Please select at least one file to proceed.",
"step": "file_loading",
}
yield error_progress
return error_progress
total_files = len(selected_files)
start_time = time.time()
# Parse repo name from URL
if "github.com" in repo_url:
repo_name = (
repo_url.replace("https://github.com/", "")
.replace("http://github.com/", "")
.strip("/")
)
if "/" not in repo_name:
error_progress = {
"status": "error",
"message": "โŒ Invalid repository URL format",
"progress": 0,
"details": "Expected format: owner/repo or https://github.com/owner/repo",
"step": "file_loading",
}
yield error_progress
return error_progress
else:
repo_name = repo_url.strip()
try:
batch_size = 10
all_documents = []
all_failed = []
# Initial progress update
initial_progress = {
"status": "loading",
"message": f"๐Ÿš€ Starting file loading from {repo_name}",
"progress": 0,
"total_files": total_files,
"processed_files": 0,
"successful_files": 0,
"failed_files": 0,
"phase": "File Loading",
"details": f"Preparing to load {total_files} files in batches of {batch_size}...",
"step": "file_loading",
"current_batch": 0,
"total_batches": (len(selected_files) + batch_size - 1) // batch_size,
"repo_name": repo_name,
}
yield initial_progress
time.sleep(0.5)
for i in range(0, len(selected_files), batch_size):
batch = selected_files[i : i + batch_size]
current_batch_num = i // batch_size + 1
total_batches = (len(selected_files) + batch_size - 1) // batch_size
# Update progress at batch start
batch_start_progress = {
"status": "loading",
"message": f"๐Ÿ”„ Loading batch {current_batch_num}/{total_batches}",
"progress": (i / total_files) * 90,
"processed_files": i,
"successful_files": len(all_documents),
"failed_files": len(all_failed),
"current_batch": current_batch_num,
"total_batches": total_batches,
"phase": "File Loading",
"details": f"Processing batch {current_batch_num}: {', '.join([f.split('/')[-1] for f in batch[:3]])}{'...' if len(batch) > 3 else ''}",
"step": "file_loading",
"repo_name": repo_name,
}
yield batch_start_progress
try:
print(f"\n๐Ÿ“ฆ PROCESSING BATCH {current_batch_num}/{total_batches}")
print(f" Files: {[f.split('/')[-1] for f in batch]}")
documents, failed = load_github_files(
repo_name=repo_name,
file_paths=batch,
branch="main",
concurrent_requests=10,
github_token=os.getenv("GITHUB_API_KEY"),
)
print("โœ… Load results:")
print(f" - Documents: {len(documents)}")
print(f" - Failed: {len(failed)}")
# Process documents
for j, doc in enumerate(documents):
print(f" ๐Ÿ“„ Doc {j + 1}: {doc.doc_id}")
print(f" Size: {len(doc.text)} chars")
if "repo" not in doc.metadata:
doc.metadata["repo"] = repo_name
print(f" โœ… Added repo metadata: {repo_name}")
all_documents.extend(documents)
all_failed.extend(failed)
# Update progress after batch completion
batch_complete_progress = {
"status": "loading",
"message": f"โœ… Completed batch {current_batch_num}/{total_batches}",
"progress": ((i + len(batch)) / total_files) * 90,
"processed_files": i + len(batch),
"successful_files": len(all_documents),
"failed_files": len(all_failed),
"current_batch": current_batch_num,
"total_batches": total_batches,
"phase": "File Loading",
"details": f"โœ… Batch {current_batch_num} complete: {len(documents)} loaded, {len(failed)} failed. Total progress: {len(all_documents)} documents loaded.",
"step": "file_loading",
"repo_name": repo_name,
}
yield batch_complete_progress
time.sleep(0.3)
except Exception as batch_error:
print(f"โŒ Batch processing error: {batch_error}")
all_failed.extend(batch)
error_progress = {
"status": "loading",
"message": f"โš ๏ธ Error in batch {current_batch_num}",
"progress": ((i + len(batch)) / total_files) * 90,
"processed_files": i + len(batch),
"successful_files": len(all_documents),
"failed_files": len(all_failed),
"current_batch": current_batch_num,
"phase": "File Loading",
"details": f"โŒ Batch {current_batch_num} error: {str(batch_error)[:100]}... Continuing with next batch.",
"step": "file_loading",
"repo_name": repo_name,
}
yield error_progress
loading_time = time.time() - start_time
# Final completion update
completion_progress = {
"status": "loaded",
"message": f"โœ… File Loading Complete! Loaded {len(all_documents)} documents",
"progress": 100,
"phase": "Files Loaded Successfully",
"details": f"๐ŸŽฏ Final Results:\nโœ… Successfully loaded: {len(all_documents)} documents\nโŒ Failed files: {len(all_failed)}\nโฑ๏ธ Total time: {loading_time:.1f}s\n๐Ÿ“Š Success rate: {(len(all_documents) / (len(all_documents) + len(all_failed)) * 100):.1f}%",
"step": "file_loading_complete",
"loaded_documents": all_documents,
"failed_files": all_failed,
"loading_time": loading_time,
"repo_name": repo_name,
"total_files": total_files,
"processed_files": total_files,
"successful_files": len(all_documents),
}
yield completion_progress
return completion_progress
except Exception as e:
total_time = time.time() - start_time
error_msg = f"โŒ File loading error after {total_time:.1f}s: {str(e)}"
print(error_msg)
error_progress = {
"status": "error",
"message": error_msg,
"progress": 0,
"phase": "Loading Failed",
"details": f"Critical error during file loading:\n{str(e)}",
"error": str(e),
"step": "file_loading",
}
yield error_progress
return error_progress
# Progress display component
def format_progress_display(progress_state: Dict) -> str:
"""Format progress state into readable display with enhanced details"""
if not progress_state:
return "๐Ÿš€ Ready to start ingestion...\n\n๐Ÿ“‹ **Two-Step Process:**\n1๏ธโƒฃ Load files from GitHub repository\n2๏ธโƒฃ Generate embeddings and store in vector database"
status = progress_state.get("status", "unknown")
message = progress_state.get("message", "")
progress = progress_state.get("progress", 0)
phase = progress_state.get("phase", "")
details = progress_state.get("details", "")
# Enhanced progress bar
filled = int(progress / 2.5) # 40 chars total
progress_bar = "โ–ˆ" * filled + "โ–‘" * (40 - filled)
# Status emoji mapping
status_emoji = {
"loading": "โณ",
"loaded": "โœ…",
"vectorizing": "๐Ÿง ",
"complete": "๐ŸŽ‰",
"error": "โŒ",
}
emoji = status_emoji.get(status, "๐Ÿ”„")
output = f"{emoji} **{message}**\n\n"
# Phase and progress section
output += f"๐Ÿ“Š **Current Phase:** {phase}\n"
output += f"๐Ÿ“ˆ **Progress:** {progress:.1f}%\n"
output += f"[{progress_bar}] {progress:.1f}%\n\n"
# Step-specific details for file loading
if progress_state.get("step") == "file_loading":
processed = progress_state.get("processed_files", 0)
total = progress_state.get("total_files", 0)
successful = progress_state.get("successful_files", 0)
failed = progress_state.get("failed_files", 0)
if total > 0:
output += "๐Ÿ“ **File Processing Status:**\n"
output += f" โ€ข Total files: {total}\n"
output += f" โ€ข Processed: {processed}/{total}\n"
output += f" โ€ข โœ… Successful: {successful}\n"
output += f" โ€ข โŒ Failed: {failed}\n"
if "current_batch" in progress_state and "total_batches" in progress_state:
output += f" โ€ข ๐Ÿ“ฆ Current batch: {progress_state['current_batch']}/{progress_state['total_batches']}\n"
output += "\n"
# Step-specific details for vector ingestion
elif progress_state.get("step") == "vector_ingestion":
docs_count = progress_state.get("documents_count", 0)
repo_name = progress_state.get("repo_name", "Unknown")
if docs_count > 0:
output += "๐Ÿง  **Vector Processing Status:**\n"
output += f" โ€ข Repository: {repo_name}\n"
output += f" โ€ข Documents: {docs_count:,}\n"
output += f" โ€ข Stage: {phase}\n\n"
# Detailed information
output += f"๐Ÿ“ **Details:**\n{details}\n"
# Final summary for completion
if status == "complete":
total_time = progress_state.get("total_time", 0)
docs_processed = progress_state.get("documents_processed", 0)
failed_files = progress_state.get("failed_files", 0)
vector_time = progress_state.get("vector_time", 0)
loading_time = progress_state.get("loading_time", 0)
repo_name = progress_state.get("repo_name", "Unknown")
output += "\n๐ŸŽŠ **INGESTION COMPLETED SUCCESSFULLY!**\n"
output += "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\n"
output += f"๐ŸŽฏ **Repository:** {repo_name}\n"
output += f"๐Ÿ“„ **Documents processed:** {docs_processed:,}\n"
output += f"โŒ **Failed files:** {len(failed_files) if isinstance(failed_files, list) else failed_files}\n"
output += f"โฑ๏ธ **Total time:** {total_time:.1f} seconds\n"
output += f" โ”œโ”€ File loading: {loading_time:.1f}s\n"
output += f" โ””โ”€ Vector processing: {vector_time:.1f}s\n"
output += (
f"๐Ÿ“Š **Processing rate:** {docs_processed / total_time:.1f} docs/second\n\n"
)
output += "๐Ÿš€ **Next Step:** Go to the 'Query Interface' tab to start asking questions!"
elif status == "error":
error = progress_state.get("error", "Unknown error")
output += "\n๐Ÿ’ฅ **ERROR OCCURRED**\n"
output += "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\n"
output += (
f"โŒ **Error Details:** {error[:300]}{'...' if len(error) > 300 else ''}\n"
)
output += "\n๐Ÿ”ง **Troubleshooting Tips:**\n"
output += " โ€ข Check your GitHub token permissions\n"
output += " โ€ข Verify repository URL format\n"
output += " โ€ข Ensure selected files exist\n"
output += " โ€ข Check network connectivity\n"
return output
# Create the main Gradio interface
with gr.Blocks(title="Doc-MCP") as demo:
gr.Markdown("# ๐Ÿ“šDoc-MCP: Documentation RAG System")
gr.Markdown(
"Transform GitHub documentation repositories into accessible MCP (Model Context Protocol) servers for AI agents. Upload documentation, generate vector embeddings, and query with intelligent context retrieval."
)
# State variables
files_state = gr.State([])
progress_state = gr.State({})
with gr.Tabs():
with gr.TabItem("๐Ÿ“ฅ Documentation Ingestion"):
gr.Markdown("### ๐Ÿš€ Two-Step Documentation Processing Pipeline")
gr.Markdown(
"**Step 1:** Fetch markdown files from GitHub repository โ†’ **Step 2:** Generate vector embeddings and store in MongoDB Atlas"
)
with gr.Row():
with gr.Column(scale=2):
repo_input = gr.Textbox(
label="๐Ÿ“‚ GitHub Repository URL",
placeholder="Enter: owner/repo or https://github.com/owner/repo (e.g., gradio-app/gradio)",
value="",
info="Enter any GitHub repository containing markdown documentation",
)
load_btn = gr.Button(
"๐Ÿ” Discover Documentation Files", variant="secondary"
)
with gr.Column(scale=1):
status_output = gr.Textbox(
label="Repository Discovery Status",
interactive=False,
lines=4,
placeholder="Repository scanning results will appear here...",
)
with gr.Row():
select_all_btn = gr.Button(
"๐Ÿ“‹ Select All Documents", variant="secondary"
)
clear_all_btn = gr.Button("๐Ÿ—‘๏ธ Clear Selection", variant="secondary")
# File selection
with gr.Accordion(label="Available Documentation Files"):
file_selector = gr.CheckboxGroup(
choices=[],
label="Select Markdown Files for RAG Processing",
visible=False,
)
# Two-step ingestion controls
gr.Markdown("### ๐Ÿ”„ RAG Pipeline Execution")
gr.Markdown(
"Process your documentation through our advanced RAG pipeline using Nebius AI embeddings and MongoDB Atlas vector storage."
)
with gr.Row():
with gr.Column():
step1_btn = gr.Button(
"๐Ÿ“ฅ Step 1: Load Files from GitHub",
variant="primary",
size="lg",
interactive=False,
)
with gr.Column():
step2_btn = gr.Button(
"๐Ÿ”„ Step 2: Start Ingestion",
variant="primary",
size="lg",
interactive=False,
)
with gr.Row():
refresh_btn = gr.Button("๐Ÿ”„ Refresh Progress", variant="secondary")
reset_btn = gr.Button("๐Ÿ—‘๏ธ Reset Progress", variant="secondary")
# Progress display
progress_display = gr.Textbox(
label="๐Ÿ“Š Real-time Ingestion Progress",
interactive=False,
lines=25,
value="๐Ÿš€ Ready to start two-step ingestion process...\n\n๐Ÿ“‹ Steps:\n1๏ธโƒฃ Load files from GitHub repository\n2๏ธโƒฃ Generate embeddings and store in vector database",
max_lines=30,
)
# Event handlers
def load_files_handler(repo_url: str):
if not repo_url.strip():
return (
gr.CheckboxGroup(choices=[], visible=False),
"Please enter a repository URL",
[],
gr.Button(interactive=False),
gr.Button(interactive=False),
)
files, message = fetch_files_with_loader(repo_url)
if files:
return (
gr.CheckboxGroup(
choices=files,
value=[],
label=f"Select Files from {repo_url} ({len(files)} files)",
visible=True,
),
message,
files,
gr.Button(interactive=True), # Enable step 1 button
gr.Button(interactive=False), # Keep step 2 disabled
)
else:
return (
gr.CheckboxGroup(choices=[], visible=False),
message,
[],
gr.Button(interactive=False),
gr.Button(interactive=False),
)
def start_step1_generator(
repo_url: str, selected_files: List[str], current_progress: Dict
):
"""Start Step 1 with generator-based real-time progress updates"""
for progress_update in start_file_loading_generator(
repo_url, selected_files, current_progress.copy()
):
progress_text = format_progress_display(progress_update)
step2_enabled = (
progress_update.get("step") == "file_loading_complete"
)
yield (
progress_update,
progress_text,
gr.Button(interactive=step2_enabled),
)
def start_step2(current_progress: Dict):
"""Start Step 2: Vector Ingestion"""
new_progress = start_vector_ingestion(current_progress.copy())
progress_text = format_progress_display(new_progress)
return new_progress, progress_text
def refresh_progress(current_progress: Dict):
"""Refresh the progress display"""
progress_text = format_progress_display(current_progress)
return progress_text
def reset_progress():
"""Reset all progress"""
return (
{},
"Ready to start two-step ingestion process...",
gr.Button(interactive=False),
)
def select_all_handler(available_files):
if available_files:
return gr.CheckboxGroup(value=available_files)
return gr.CheckboxGroup(value=[])
def clear_all_handler():
return gr.CheckboxGroup(value=[])
# Wire up events
load_btn.click(
fn=load_files_handler,
inputs=[repo_input],
outputs=[
file_selector,
status_output,
files_state,
step1_btn,
step2_btn,
],
show_api=False,
)
select_all_btn.click(
fn=select_all_handler,
inputs=[files_state],
outputs=[file_selector],
show_api=False,
)
clear_all_btn.click(
fn=clear_all_handler, outputs=[file_selector], show_api=False
)
step1_btn.click(
fn=start_step1_generator,
inputs=[repo_input, file_selector, progress_state],
outputs=[progress_state, progress_display, step2_btn],
show_api=False,
)
step2_btn.click(
fn=start_step2,
inputs=[progress_state],
outputs=[progress_state, progress_display],
show_api=False,
)
refresh_btn.click(
fn=refresh_progress,
inputs=[progress_state],
outputs=[progress_display],
show_api=False,
)
reset_btn.click(
fn=reset_progress,
outputs=[progress_state, progress_display, step2_btn],
show_api=False,
)
# ================================
# Tab 2: Query Interface
# ================================
with gr.TabItem("๐Ÿค– AI Documentation Assistant"):
gr.Markdown("### ๐Ÿ’ฌ Intelligent Documentation Q&A")
gr.Markdown(
"Query your processed documentation using advanced semantic search. Get contextual answers with source citations powered by Nebius LLM and vector similarity search."
)
with gr.Row():
with gr.Column(scale=2):
# Repository selection - Dropdown that becomes textbox when selected
with gr.Row():
repo_dropdown = gr.Dropdown(
choices=get_available_repositories()
or ["No repositories available"],
label="๐Ÿ“š Select Documentation Repository",
value=None,
interactive=True,
allow_custom_value=True,
info="Choose from available repositories",
)
# Hidden textbox that will become visible when repo is selected
selected_repo_textbox = gr.Textbox(
label="๐ŸŽฏ Selected Repository",
value="",
interactive=False,
visible=False,
info="Currently selected repository for querying",
)
refresh_repos_btn = gr.Button(
"๐Ÿ”„ Refresh Repository List", variant="secondary", size="sm"
)
# Query mode selection
query_mode = gr.Radio(
choices=["default", "text_search", "hybrid"],
label="๐Ÿ” Search Strategy",
value="default",
info="โ€ข default: Semantic similarity (AI understanding)\nโ€ข text_search: Keyword matching\nโ€ข hybrid: Combined approach for best results",
)
# Query input
query_input = gr.Textbox(
label="๐Ÿ’ญ Ask About Your Documentation",
placeholder="How do I implement a custom component? What are the available API endpoints? How to configure the system?",
lines=3,
info="Ask natural language questions about your documentation",
)
query_btn = gr.Button(
"๐Ÿš€ Search Documentation", variant="primary", size="lg"
)
# Response display as text area
response_output = gr.Textbox(
label="๐Ÿค– AI Assistant Response",
value="Your AI-powered documentation response will appear here with contextual information and source citations...",
lines=10,
interactive=False,
info="Generated using Nebius LLM with retrieved documentation context",
)
with gr.Column(scale=2):
gr.Markdown("### ๐Ÿ“– Source References")
gr.Markdown(
"View the exact documentation sources used to generate the response, with relevance scores and GitHub links."
)
# Source nodes display as JSON
sources_output = gr.JSON(
label="๐Ÿ“Ž Source Citations & Metadata",
value={
"message": "Source documentation excerpts with relevance scores will appear here after your query...",
"info": "Each source includes file path, relevance score, and content snippet",
},
)
# Event handlers
def handle_repo_selection(selected_repo):
"""Handle repository selection from dropdown"""
if not selected_repo or selected_repo in [
"No repositories available",
"",
]:
return (
gr.Dropdown(visible=True), # Keep dropdown visible
gr.Textbox(visible=False, value=""), # Hide textbox
gr.Button(interactive=False), # Disable query button
)
else:
return (
gr.Dropdown(visible=False), # Hide dropdown
gr.Textbox(
visible=True, value=selected_repo
), # Show textbox with selected repo
gr.Button(interactive=True), # Enable query button
)
def reset_repo_selection():
"""Reset to show dropdown again"""
try:
repos = get_available_repositories() or [
"No repositories available"
]
return (
gr.Dropdown(
choices=repos, value=None, visible=True
), # Show dropdown with refreshed choices
gr.Textbox(visible=False, value=""), # Hide textbox
gr.Button(interactive=False), # Disable query button
)
except Exception as e:
print(f"Error refreshing repository list: {e}")
return (
gr.Dropdown(
choices=["Error loading repositories"],
value=None,
visible=True,
),
gr.Textbox(visible=False, value=""),
gr.Button(interactive=False),
)
def get_available_docs_repo():
"""
List the available docs of repositories - should be called first to list out all the available repo docs to chat with
Returns:
Updated dropdown with available repositories
"""
try:
repos = get_available_repositories()
if not repos:
repos = [
"No repositories available - Please ingest documentation first"
]
return gr.Dropdown(choices=repos, value=None)
except Exception as e:
print(f"Error refreshing repository list: {e}")
return gr.Dropdown(
choices=["Error loading repositories"], value=None
)
# Simple query handler
def handle_query(repo: str, mode: str, query: str):
"""
Handle query request - returns raw data from retriever
Args:
repo: Selected repository from textbox
mode: Query mode (default, text_search, hybrid)
query: User's query
Returns:
Raw result dict from QueryRetriever.make_query()
"""
if not query.strip():
return {"error": "Please enter a query."}
if not repo or repo in [
"No repositories available",
"Error loading repositories",
"",
]:
return {"error": "Please select a valid repository."}
try:
# Create query retriever for the selected repo
retriever = QueryRetriever(repo)
# Make the query and return raw result
result = retriever.make_query(query, mode)
return result
except Exception as e:
print(f"Query error: {e}")
traceback.print_exc()
return {"error": f"Query failed: {str(e)}"}
def make_query(repo: str, mode: str, query: str):
"""
Retrieve relevant documentation context for a given query using specified retrieval mode.
This function is designed to support Retrieval-Augmented Generation (RAG) by extracting
the most relevant context chunks from indexed documentation sources.
Args:
repo: Selected repository from the textbox input
mode: Query mode (default, text_search, hybrid)
query: User's query
Returns:
Tuple of (response_text, source_nodes_json)
"""
# Get raw result
result = handle_query(repo, mode, query)
# Extract response text
if "error" in result:
response_text = f"Error: {result['error']}"
source_nodes = {"error": result["error"]}
else:
response_text = result.get("response", "No response available")
source_nodes = result.get("source_nodes", [])
return response_text, source_nodes
# Wire up events
# Handle repository selection from dropdown
repo_dropdown.change(
fn=handle_repo_selection,
inputs=[repo_dropdown],
outputs=[repo_dropdown, selected_repo_textbox, query_btn],
show_api=False,
)
# Handle refresh button - resets to dropdown view
refresh_repos_btn.click(
fn=reset_repo_selection,
outputs=[repo_dropdown, selected_repo_textbox, query_btn],
show_api=False,
)
# Also provide API endpoint for listing repositories
refresh_repos_btn.click(
fn=get_available_docs_repo,
outputs=[repo_dropdown],
api_name="list_available_docs",
)
# Query button uses the textbox value (not dropdown)
query_btn.click(
fn=make_query,
inputs=[
selected_repo_textbox,
query_mode,
query_input,
], # Use textbox, not dropdown
outputs=[response_output, sources_output],
api_name="query_documentation",
)
# Also allow Enter key to trigger query
query_input.submit(
fn=make_query,
inputs=[
selected_repo_textbox,
query_mode,
query_input,
], # Use textbox, not dropdown
outputs=[response_output, sources_output],
show_api=False,
)
# ================================
# Tab 3: Repository Management
# ================================
with gr.TabItem("๐Ÿ—‚๏ธ Repository Management", visible=ENABLE_REPO_MANAGEMENT):
gr.Markdown(
"Manage your ingested repositories - view details and delete repositories when needed."
)
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### ๐Ÿ“Š Repository Statistics")
stats_display = gr.JSON(
label="Database Statistics",
value={"message": "Click refresh to load statistics..."},
)
refresh_stats_btn = gr.Button(
"๐Ÿ”„ Refresh Statistics", variant="secondary"
)
with gr.Column(scale=2):
gr.Markdown("### ๐Ÿ“‹ Repository Details")
repos_table = gr.Dataframe(
headers=["Repository", "Files", "Last Updated"],
datatype=["str", "number", "str"],
label="Ingested Repositories",
interactive=False,
wrap=True,
)
refresh_repos_btn = gr.Button(
"๐Ÿ”„ Refresh Repository List", variant="secondary"
)
gr.Markdown("### ๐Ÿ—‘๏ธ Delete Repository")
gr.Markdown(
"**โš ๏ธ Warning:** This will permanently delete all documents and metadata for the selected repository."
)
with gr.Row():
with gr.Column(scale=2):
delete_repo_dropdown = gr.Dropdown(
choices=[],
label="Select Repository to Delete",
value=None,
interactive=True,
allow_custom_value=False,
)
# Confirmation checkbox
confirm_delete = gr.Checkbox(
label="I understand this action cannot be undone", value=False
)
delete_btn = gr.Button(
"๐Ÿ—‘๏ธ Delete Repository",
variant="stop",
size="lg",
interactive=False,
)
with gr.Column(scale=1):
deletion_status = gr.Textbox(
label="Deletion Status",
value="Select a repository and confirm to enable deletion.",
interactive=False,
lines=6,
)
# Management functions
def load_repository_stats():
"""Load overall repository statistics"""
try:
stats = get_repository_stats()
return stats
except Exception as e:
return {"error": f"Failed to load statistics: {str(e)}"}
def load_repository_details():
"""Load detailed repository information as a table"""
try:
details = get_repo_details()
if not details:
return [["No repositories found", 0, "N/A"]]
# Format for dataframe
table_data = []
for repo in details:
last_updated = repo.get("last_updated", "Unknown")
if hasattr(last_updated, "strftime"):
last_updated = last_updated.strftime("%Y-%m-%d %H:%M")
elif last_updated != "Unknown":
last_updated = str(last_updated)
table_data.append(
[
repo.get("repo_name", "Unknown"),
repo.get("file_count", 0),
last_updated,
]
)
return table_data
except Exception as e:
return [["Error loading repositories", 0, str(e)]]
def update_delete_dropdown():
"""Update the dropdown with available repositories"""
try:
repos = get_available_repositories()
return gr.Dropdown(choices=repos, value=None)
except Exception as e:
print(f"Error updating delete dropdown: {e}")
return gr.Dropdown(choices=[], value=None)
def check_delete_button_state(repo_selected, confirmation_checked):
"""Enable/disable delete button based on selection and confirmation"""
if repo_selected and confirmation_checked:
return gr.Button(interactive=True)
else:
return gr.Button(interactive=False)
def delete_repository(repo_name: str, confirmed: bool):
"""Delete the selected repository"""
if not repo_name:
return (
"โŒ No repository selected.",
gr.Dropdown(choices=[]),
gr.Checkbox(value=False),
)
if not confirmed:
return (
"โŒ Please confirm deletion by checking the checkbox.",
gr.Dropdown(choices=[]),
gr.Checkbox(value=False),
)
try:
# Perform deletion
result = delete_repository_data(repo_name)
# Prepare status message
status_msg = result["message"]
if result["success"]:
status_msg += "\n\n๐Ÿ“Š Deletion Summary:"
status_msg += f"\n- Vector documents removed: {result['vector_docs_deleted']}"
status_msg += f"\n- Repository record deleted: {'Yes' if result['repo_record_deleted'] else 'No'}"
status_msg += f"\n\nโœ… Repository '{repo_name}' has been completely removed."
# Update dropdown (remove deleted repo)
updated_dropdown = update_delete_dropdown()
# Reset confirmation checkbox
reset_checkbox = gr.Checkbox(value=False)
return status_msg, updated_dropdown, reset_checkbox
except Exception as e:
error_msg = f"โŒ Error deleting repository: {str(e)}"
return error_msg, gr.Dropdown(choices=[]), gr.Checkbox(value=False)
# Wire up management events
refresh_stats_btn.click(
fn=load_repository_stats, outputs=[stats_display], show_api=False
)
refresh_repos_btn.click(
fn=load_repository_details, outputs=[repos_table], show_api=False
)
# Update delete dropdown when refreshing repos
refresh_repos_btn.click(
fn=update_delete_dropdown,
outputs=[delete_repo_dropdown],
show_api=False,
)
# Enable/disable delete button based on selection and confirmation
delete_repo_dropdown.change(
fn=check_delete_button_state,
inputs=[delete_repo_dropdown, confirm_delete],
outputs=[delete_btn],
show_api=False,
)
confirm_delete.change(
fn=check_delete_button_state,
inputs=[delete_repo_dropdown, confirm_delete],
outputs=[delete_btn],
show_api=False,
)
# Delete repository
delete_btn.click(
fn=delete_repository,
inputs=[delete_repo_dropdown, confirm_delete],
outputs=[deletion_status, delete_repo_dropdown, confirm_delete],
show_api=False,
)
# Load data on tab load
demo.load(fn=load_repository_stats, outputs=[stats_display], show_api=False)
demo.load(fn=load_repository_details, outputs=[repos_table], show_api=False)
demo.load(
fn=update_delete_dropdown,
outputs=[delete_repo_dropdown],
show_api=False,
)
# ================================
# Tab 4: GitHub File Search (Hidden API)
# ================================
with gr.TabItem("๐Ÿ” GitHub File Search", visible=False):
gr.Markdown("### ๐Ÿ”ง GitHub Repository File Search API")
gr.Markdown(
"Pure API endpoints for GitHub file operations - all responses in JSON format"
)
with gr.Row():
with gr.Column():
gr.Markdown("#### ๐Ÿ“‹ List Repository Files")
# Repository input for file operations
api_repo_input = gr.Textbox(
label="Repository URL",
placeholder="owner/repo or https://github.com/owner/repo",
value="",
info="GitHub repository to scan",
)
# Branch selection
api_branch_input = gr.Textbox(
label="Branch",
value="main",
placeholder="main",
info="Branch to search (default: main)",
)
# File extensions
api_extensions_input = gr.Textbox(
label="File Extensions (comma-separated)",
value=".md,.mdx",
placeholder=".md,.mdx,.txt",
info="File extensions to include",
)
# List files button
list_files_btn = gr.Button("๐Ÿ“‹ List Files", variant="primary")
with gr.Column():
gr.Markdown("#### ๐Ÿ“„ Get Single File")
# Single file inputs
single_repo_input = gr.Textbox(
label="Repository URL",
placeholder="owner/repo or https://github.com/owner/repo",
value="",
info="GitHub repository",
)
single_file_input = gr.Textbox(
label="File Path",
placeholder="docs/README.md",
value="",
info="Path to specific file in repository",
)
single_branch_input = gr.Textbox(
label="Branch",
value="main",
placeholder="main",
info="Branch name (default: main)",
)
# Get single file button
get_single_btn = gr.Button(
"๐Ÿ“„ Get Single File", variant="secondary"
)
with gr.Row():
with gr.Column():
gr.Markdown("#### ๐Ÿ“š Get Multiple Files")
# Multiple files inputs
multiple_repo_input = gr.Textbox(
label="Repository URL",
placeholder="owner/repo or https://github.com/owner/repo",
value="",
info="GitHub repository",
)
multiple_files_input = gr.Textbox(
label="File Paths (comma-separated)",
placeholder="README.md,docs/guide.md,api/overview.md",
value="",
lines=3,
info="Comma-separated list of file paths",
)
multiple_branch_input = gr.Textbox(
label="Branch",
value="main",
placeholder="main",
info="Branch name (default: main)",
)
# Get multiple files button
get_multiple_btn = gr.Button(
"๐Ÿ“š Get Multiple Files", variant="secondary"
)
# Single JSON output for all operations
gr.Markdown("### ๐Ÿ“Š API Response")
api_response_output = gr.JSON(
label="JSON Response",
value={
"message": "API responses will appear here",
"info": "Use the buttons above to interact with GitHub repositories",
},
)
# Pure API Functions (JSON only responses)
def list_repository_files(
repo_url: str, branch: str = "main", extensions: str = ".md,.mdx"
):
"""
List all files in a GitHub repository with specified extensions
Args:
repo_url: GitHub repository URL or owner/repo format
branch: Branch name to search (default: main)
extensions: Comma-separated file extensions (default: .md,.mdx)
Returns:
JSON response with file list and metadata
"""
try:
if not repo_url.strip():
return {"success": False, "error": "Repository URL is required"}
# Parse extensions list
ext_list = [
ext.strip() for ext in extensions.split(",") if ext.strip()
]
if not ext_list:
ext_list = [".md", ".mdx"]
# Get files list
files, status_message = fetch_repository_files(
repo_url=repo_url,
file_extensions=ext_list,
github_token=os.getenv("GITHUB_API_KEY"),
branch=branch,
)
if files:
return {
"success": True,
"repository": repo_url,
"branch": branch,
"extensions": ext_list,
"total_files": len(files),
"files": files,
"status": status_message,
}
else:
return {
"success": False,
"repository": repo_url,
"branch": branch,
"extensions": ext_list,
"total_files": 0,
"files": [],
"error": status_message or "No files found",
}
except Exception as e:
return {
"success": False,
"error": f"Failed to list files: {str(e)}",
"repository": repo_url,
"branch": branch,
}
def get_single_file(repo_url: str, file_path: str, branch: str = "main"):
"""
Retrieve a single file from GitHub repository
Args:
repo_url: GitHub repository URL or owner/repo format
file_path: Path to the file in the repository
branch: Branch name (default: main)
Returns:
JSON response with file content and metadata
"""
try:
if not repo_url.strip():
return {"success": False, "error": "Repository URL is required"}
if not file_path.strip():
return {"success": False, "error": "File path is required"}
# Parse repo name
if "github.com" in repo_url:
repo_name = (
repo_url.replace("https://github.com/", "")
.replace("http://github.com/", "")
.strip("/")
)
else:
repo_name = repo_url.strip()
# Load single file
documents, failed = load_github_files(
repo_name=repo_name,
file_paths=[file_path.strip()],
branch=branch,
github_token=os.getenv("GITHUB_API_KEY"),
)
if documents and len(documents) > 0:
doc = documents[0]
return {
"success": True,
"repository": repo_name,
"branch": branch,
"file_path": file_path,
"file_name": doc.metadata.get("file_name", ""),
"file_size": len(doc.text),
"content": doc.text,
"metadata": doc.metadata,
"url": doc.metadata.get("url", ""),
"raw_url": doc.metadata.get("raw_url", ""),
}
else:
error_msg = f"Failed to retrieve file: {failed[0] if failed else 'File not found or access denied'}"
return {
"success": False,
"repository": repo_name,
"branch": branch,
"file_path": file_path,
"error": error_msg,
}
except Exception as e:
return {
"success": False,
"error": f"Failed to get single file: {str(e)}",
"repository": repo_url,
"file_path": file_path,
"branch": branch,
}
def get_multiple_files(
repo_url: str, file_paths_str: str, branch: str = "main"
):
"""
Retrieve multiple files from GitHub repository
Args:
repo_url: GitHub repository URL or owner/repo format
file_paths_str: Comma-separated string of file paths
branch: Branch name (default: main)
Returns:
JSON response with multiple file contents and metadata
"""
try:
if not repo_url.strip():
return {"success": False, "error": "Repository URL is required"}
if not file_paths_str.strip():
return {"success": False, "error": "File paths are required"}
# Parse file paths from comma-separated string
file_paths = [
path.strip()
for path in file_paths_str.split(",")
if path.strip()
]
if not file_paths:
return {
"success": False,
"error": "No valid file paths provided",
}
# Parse repo name
if "github.com" in repo_url:
repo_name = (
repo_url.replace("https://github.com/", "")
.replace("http://github.com/", "")
.strip("/")
)
else:
repo_name = repo_url.strip()
# Load multiple files
documents, failed = load_github_files(
repo_name=repo_name,
file_paths=file_paths,
branch=branch,
github_token=os.getenv("GITHUB_API_KEY"),
)
# Process successful documents
successful_files = []
for doc in documents:
file_data = {
"file_path": doc.metadata.get("file_path", ""),
"file_name": doc.metadata.get("file_name", ""),
"file_size": len(doc.text),
"content": doc.text,
"metadata": doc.metadata,
"url": doc.metadata.get("url", ""),
"raw_url": doc.metadata.get("raw_url", ""),
}
successful_files.append(file_data)
return {
"success": True,
"repository": repo_name,
"branch": branch,
"requested_files": len(file_paths),
"successful_files": len(successful_files),
"failed_files": len(failed),
"files": successful_files,
"failed_file_paths": failed,
"total_content_size": sum(len(doc.text) for doc in documents),
"requested_file_paths": file_paths,
}
except Exception as e:
return {
"success": False,
"error": f"Failed to get multiple files: {str(e)}",
"repository": repo_url,
"file_paths": file_paths_str,
"branch": branch,
}
# Wire up the GitHub file search events - all output to single JSON component
list_files_btn.click(
fn=list_repository_files,
inputs=[api_repo_input, api_branch_input, api_extensions_input],
outputs=[api_response_output],
api_name="list_repository_files",
)
get_single_btn.click(
fn=get_single_file,
inputs=[single_repo_input, single_file_input, single_branch_input],
outputs=[api_response_output],
api_name="get_single_file",
)
get_multiple_btn.click(
fn=get_multiple_files,
inputs=[
multiple_repo_input,
multiple_files_input,
multiple_branch_input,
],
outputs=[api_response_output],
api_name="get_multiple_files",
)
# ================================
# Tab 5: About & MCP Configuration
# ================================
with gr.TabItem("โ„น๏ธ About & MCP Setup"):
gr.Markdown("# ๐Ÿ“š Doc-MCP: Documentation RAG System")
gr.Markdown(
"**Transform GitHub documentation repositories into accessible MCP servers for AI agents.**"
)
with gr.Row():
with gr.Column(scale=2):
# Project Overview
with gr.Accordion("๐ŸŽฏ What is Doc-MCP?", open=True):
gr.Markdown("""
**Doc-MCP** converts GitHub documentation into AI-queryable knowledge bases via the Model Context Protocol.
**๐Ÿ”‘ Key Features:**
- ๐Ÿ“ฅ **GitHub Integration** - Automatic markdown file extraction
- ๐Ÿง  **AI Embeddings** - Nebius AI-powered vector search
- ๐Ÿ” **Smart Search** - Semantic, keyword & hybrid modes
- ๐Ÿค– **MCP Server** - Direct AI agent integration
- โšก **Real-time** - Live processing progress
""")
# Quick Start Guide
with gr.Accordion("๐Ÿš€ Quick Start", open=False):
gr.Markdown("""
**1. Ingest Documentation** โ†’ Enter GitHub repo URL โ†’ Select files โ†’ Run 2-step pipeline
**2. Query with AI** โ†’ Select repository โ†’ Ask questions โ†’ Get answers with sources
**3. Manage Repos** โ†’ View stats โ†’ Delete old repositories
**4. Use MCP Tools** โ†’ Configure your AI agent โ†’ Query docs directly from IDE
""")
with gr.Column(scale=2):
# MCP Server Configuration
with gr.Accordion("๐Ÿ”ง MCP Server Setup", open=True):
gr.Markdown("### ๐ŸŒ Server URL")
# Server URL
gr.Textbox(
value="https://agents-mcp-hackathon-doc-mcp.hf.space/gradio_api/mcp/sse",
label="MCP Endpoint",
interactive=False,
info="Copy this URL for your MCP client configuration",
)
gr.Markdown("### โš™๏ธ Configuration")
# SSE Configuration
with gr.Accordion("For Cursor, Windsurf, Cline", open=False):
sse_config = """{
"mcpServers": {
"doc-mcp": {
"url": "https://agents-mcp-hackathon-doc-mcp.hf.space/gradio_api/mcp/sse"
}
}
}"""
gr.Code(
value=sse_config,
label="SSE Configuration",
language="json",
interactive=False,
)
# STDIO Configuration
with gr.Accordion(
"For STDIO Clients (Experimental)", open=False
):
stdio_config = """{
"mcpServers": {
"doc-mcp": {
"command": "npx",
"args": ["mcp-remote", "https://agents-mcp-hackathon-doc-mcp.hf.space/gradio_api/mcp/sse", "--transport", "sse-only"]
}
}
}"""
gr.Code(
value=stdio_config,
label="STDIO Configuration",
language="json",
interactive=False,
)
# MCP Tools Overview
with gr.Row():
with gr.Column():
gr.Markdown("### ๐Ÿ› ๏ธ Available MCP Tools")
with gr.Row():
with gr.Column():
gr.Markdown("**๐Ÿ” Documentation Query Tools**")
gr.Markdown(
"โ€ข `get_available_docs_repo` - List repositories"
)
gr.Markdown("โ€ข `make_query` - Search documentation with AI")
with gr.Column():
gr.Markdown("**๐Ÿ“ GitHub File Tools**")
gr.Markdown("โ€ข `list_repository_files` - Scan repo files")
gr.Markdown("โ€ข `get_single_file` - Fetch one file")
gr.Markdown("โ€ข `get_multiple_files` - Fetch multiple files")
# Technology Stack & Project Info
with gr.Row():
with gr.Column():
with gr.Accordion("โš™๏ธ Technology Stack", open=False):
gr.Markdown("**๐Ÿ–ฅ๏ธ Frontend & API**")
gr.Markdown("โ€ข **Gradio** - Web interface & API framework")
gr.Markdown("โ€ข **Hugging Face Spaces** - Cloud hosting")
gr.Markdown("**๐Ÿค– AI & ML**")
gr.Markdown("โ€ข **Nebius AI** - LLM & embedding models")
gr.Markdown("โ€ข **LlamaIndex** - RAG framework")
gr.Markdown("**๐Ÿ’พ Database & Storage**")
gr.Markdown("โ€ข **MongoDB Atlas** - Vector database")
gr.Markdown("โ€ข **GitHub API** - Source file access")
gr.Markdown("**๐Ÿ”Œ Integration**")
gr.Markdown("โ€ข **Model Context Protocol** - AI agent standard")
gr.Markdown(
"โ€ข **Server-Sent Events** - Real-time communication"
)
with gr.Column():
with gr.Accordion("๐Ÿ‘ฅ Project Information", open=False):
gr.Markdown("**๐Ÿ† MCP Hackathon Project**")
gr.Markdown(
"Created to showcase AI agent integration with documentation systems."
)
gr.Markdown("**๐Ÿ’ก Inspiration**")
gr.Markdown("โ€ข Making Gradio docs easily searchable")
gr.Markdown("โ€ข Leveraging Hugging Face AI ecosystem")
gr.Markdown(
"โ€ข Improving developer experience with AI assistants"
)
gr.Markdown("**๐Ÿ”ฎ Future Plans**")
gr.Markdown("โ€ข Support for PDF, HTML files")
gr.Markdown("โ€ข Multi-language documentation")
gr.Markdown("โ€ข Custom embedding fine-tuning")
gr.Markdown("**๐Ÿ“„ License:** MIT - Free to use and modify")
# Usage Examples
with gr.Row():
with gr.Column():
with gr.Accordion("๐Ÿ’ก Usage Examples", open=False):
gr.Markdown("### Example Workflow")
with gr.Row():
with gr.Column():
gr.Markdown("**๐Ÿ“ฅ Step 1: Ingest Docs**")
gr.Code(
value="1. Enter: gradio-app/gradio\n2. Select markdown files\n3. Run ingestion pipeline",
label="Ingestion Process",
interactive=False,
)
with gr.Column():
gr.Markdown("**๐Ÿค– Step 2: Query with AI**")
gr.Code(
value='Query: "How to create custom components?"\nResponse: Detailed answer with source links',
label="AI Query Example",
interactive=False,
)
gr.Markdown("### MCP Tool Usage")
gr.Code(
value="""# In your AI agent:
1. Call: get_available_docs_repo() -> ["gradio-app/gradio", ...]
2. Call: make_query("gradio-app/gradio", "default", "custom components")
3. Get: AI response + source citations""",
label="MCP Integration Example",
language="python",
interactive=False,
)
if __name__ == "__main__":
demo.launch(mcp_server=True)