Upload 60 files
Browse filesInitial upload of project
This view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +36 -0
- .gitignore +26 -0
- .python-version +1 -0
- README.md +180 -0
- app.py +0 -0
- mcp_api_call.py +39 -0
- mcp_hub/__init__.py +20 -0
- mcp_hub/__pycache__/__init__.cpython-312.pyc +0 -0
- mcp_hub/__pycache__/cache_utils.cpython-312.pyc +0 -0
- mcp_hub/__pycache__/config.cpython-312.pyc +0 -0
- mcp_hub/__pycache__/exceptions.cpython-312.pyc +0 -0
- mcp_hub/__pycache__/health_monitoring.cpython-312.pyc +0 -0
- mcp_hub/__pycache__/logging_config.cpython-312.pyc +0 -0
- mcp_hub/__pycache__/package_utils.cpython-312.pyc +0 -0
- mcp_hub/__pycache__/performance_monitoring.cpython-312.pyc +0 -0
- mcp_hub/__pycache__/reliability_utils.cpython-312.pyc +0 -0
- mcp_hub/__pycache__/sandbox_pool.cpython-312.pyc +0 -0
- mcp_hub/__pycache__/utils.cpython-312.pyc +0 -0
- mcp_hub/advanced_config.py +272 -0
- mcp_hub/async_utils.py +95 -0
- mcp_hub/cache_utils.py +211 -0
- mcp_hub/config.py +120 -0
- mcp_hub/exceptions.py +28 -0
- mcp_hub/health_monitoring.py +261 -0
- mcp_hub/logging_config.py +51 -0
- mcp_hub/package_utils.py +192 -0
- mcp_hub/performance_monitoring.py +232 -0
- mcp_hub/reliability_utils.py +254 -0
- mcp_hub/sandbox_pool.py +701 -0
- mcp_hub/utils.py +439 -0
- pyproject.toml +28 -0
- pytest.ini +11 -0
- requirements.txt +11 -0
- tests/__init__.py +1 -0
- tests/__pycache__/__init__.cpython-312.pyc +0 -0
- tests/__pycache__/conftest.cpython-312-pytest-8.4.0.pyc +0 -0
- tests/conftest.py +142 -0
- tests/integration/__init__.py +1 -0
- tests/integration/__pycache__/__init__.cpython-312.pyc +0 -0
- tests/integration/__pycache__/test_async_sync_error_handling.cpython-312-pytest-8.4.0.pyc +0 -0
- tests/integration/__pycache__/test_end_to_end_workflow.cpython-312-pytest-8.4.0.pyc +0 -0
- tests/integration/__pycache__/test_performance_resources.cpython-312-pytest-8.4.0.pyc +0 -0
- tests/integration/__pycache__/test_ui_endpoints.cpython-312-pytest-8.4.0.pyc +0 -0
- tests/unit/__init__.py +1 -0
- tests/unit/__pycache__/__init__.cpython-312.pyc +0 -0
- tests/unit/__pycache__/test_citation_formatter_agent.cpython-312-pytest-8.4.0.pyc +0 -0
- tests/unit/__pycache__/test_code_generator_agent.cpython-312-pytest-8.4.0.pyc +0 -0
- tests/unit/__pycache__/test_code_runner_agent.cpython-312-pytest-8.4.0.pyc +0 -0
- tests/unit/__pycache__/test_llm_processor_agent.cpython-312-pytest-8.4.0.pyc +0 -0
- tests/unit/__pycache__/test_orchestrator_agent.cpython-312-pytest-8.4.0.pyc +0 -0
.gitattributes
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Python-generated files
|
2 |
+
__pycache__/
|
3 |
+
*.py[oc]
|
4 |
+
build/
|
5 |
+
dist/
|
6 |
+
wheels/
|
7 |
+
*.egg-info
|
8 |
+
|
9 |
+
# Virtual environments
|
10 |
+
.venv
|
11 |
+
.cadence
|
12 |
+
.env
|
13 |
+
.idea
|
14 |
+
.mypy_cache/
|
15 |
+
archive/
|
16 |
+
cache
|
17 |
+
logs
|
18 |
+
|
19 |
+
# Test-generated files
|
20 |
+
test_cache/
|
21 |
+
.coverage
|
22 |
+
htmlcov/
|
23 |
+
.pytest_cache/
|
24 |
+
.ruff_cache
|
25 |
+
assets
|
26 |
+
static
|
.python-version
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
3.12
|
README.md
ADDED
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: ShallowCodeResearch
|
3 |
+
emoji: 📉
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: pink
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 5.33.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
short_description: Coding research assistant that generates code and tests it
|
11 |
+
tags:
|
12 |
+
- mcp
|
13 |
+
- multi-agent
|
14 |
+
- research
|
15 |
+
- code-generation
|
16 |
+
- ai-assistant
|
17 |
+
- gradio
|
18 |
+
- python
|
19 |
+
- web-search
|
20 |
+
- llm
|
21 |
+
- modal
|
22 |
+
python_version: "3.12"
|
23 |
+
---
|
24 |
+
---
|
25 |
+
|
26 |
+
# MCP Hub - Multi-Agent AI Research & Code Assistant
|
27 |
+
|
28 |
+
🚀 **Advanced multi-agent system for AI-powered research and code generation**
|
29 |
+
|
30 |
+
## What is MCP Hub?
|
31 |
+
|
32 |
+
MCP Hub is a sophisticated multi-agent research and code assistant built using Gradio's Model Context Protocol (MCP) server functionality. It orchestrates specialized AI agents to provide comprehensive research capabilities and generate executable Python code.
|
33 |
+
|
34 |
+
## ✨ Key Features
|
35 |
+
|
36 |
+
- 🧠 **Multi-Agent Architecture**: Specialized agents working in orchestrated workflows
|
37 |
+
- 🔍 **Intelligent Research**: Web search with automatic summarization and citation formatting
|
38 |
+
- 💻 **Code Generation**: Context-aware Python code creation with secure execution
|
39 |
+
- 🔗 **MCP Server**: Built-in MCP server for seamless agent communication
|
40 |
+
- 🎯 **Multiple LLM Support**: Compatible with Nebius, OpenAI, Anthropic, and HuggingFace
|
41 |
+
- 🛡️ **Secure Execution**: Modal sandbox environment for safe code execution
|
42 |
+
- 📊 **Performance Monitoring**: Advanced metrics collection and health monitoring
|
43 |
+
|
44 |
+
## 🚀 Quick Start
|
45 |
+
|
46 |
+
1. **Configure your environment** by setting up API keys in the Settings tab
|
47 |
+
2. **Choose your LLM provider** (Nebius recommended for best performance)
|
48 |
+
3. **Input your research query** in the Orchestrator Flow tab
|
49 |
+
4. **Watch the magic happen** as agents collaborate to research and generate code
|
50 |
+
|
51 |
+
## 🏗️ Architecture
|
52 |
+
|
53 |
+
### Core Agents
|
54 |
+
|
55 |
+
- **Question Enhancer**: Breaks down complex queries into focused sub-questions
|
56 |
+
- **Web Search Agent**: Performs targeted searches using Tavily API
|
57 |
+
- **LLM Processor**: Handles text processing, summarization, and analysis
|
58 |
+
- **Citation Formatter**: Manages academic citation formatting (APA style)
|
59 |
+
- **Code Generator**: Creates contextually-aware Python code
|
60 |
+
- **Code Runner**: Executes code in secure Modal sandboxes
|
61 |
+
- **Orchestrator**: Coordinates the complete workflow
|
62 |
+
|
63 |
+
### Workflow Example
|
64 |
+
|
65 |
+
```
|
66 |
+
User Query: "Create Python code to analyze Twitter sentiment"
|
67 |
+
↓
|
68 |
+
Question Enhancement: Split into focused sub-questions
|
69 |
+
↓
|
70 |
+
Web Research: Search for Twitter APIs, sentiment libraries, examples
|
71 |
+
↓
|
72 |
+
Context Integration: Combine research into comprehensive context
|
73 |
+
↓
|
74 |
+
Code Generation: Create executable Python script
|
75 |
+
↓
|
76 |
+
Secure Execution: Run code in Modal sandbox
|
77 |
+
↓
|
78 |
+
Results: Code + output + research summary + citations
|
79 |
+
```
|
80 |
+
|
81 |
+
## 🛠️ Setup Requirements
|
82 |
+
|
83 |
+
### Required API Keys
|
84 |
+
|
85 |
+
- **LLM Provider** (choose one):
|
86 |
+
- Nebius API (recommended)
|
87 |
+
- OpenAI API
|
88 |
+
- Anthropic API
|
89 |
+
- HuggingFace Inference API
|
90 |
+
- **Tavily API** (for web search)
|
91 |
+
- **Modal Account** (for code execution)
|
92 |
+
|
93 |
+
### Environment Configuration
|
94 |
+
|
95 |
+
Set these environment variables or configure in the app:
|
96 |
+
|
97 |
+
```bash
|
98 |
+
LLM_PROVIDER=nebius # Your chosen provider
|
99 |
+
NEBIUS_API_KEY=your_key_here
|
100 |
+
TAVILY_API_KEY=your_key_here
|
101 |
+
# Modal setup handled automatically
|
102 |
+
```
|
103 |
+
|
104 |
+
## 🎯 Use Cases
|
105 |
+
|
106 |
+
### Research & Development
|
107 |
+
- **Academic Research**: Automated literature review and citation management
|
108 |
+
- **Technical Documentation**: Generate comprehensive guides with current information
|
109 |
+
- **Market Analysis**: Research trends and generate analytical reports
|
110 |
+
|
111 |
+
### Code Generation
|
112 |
+
- **Prototype Development**: Rapidly create functional code based on requirements
|
113 |
+
- **API Integration**: Generate code for working with various APIs and services
|
114 |
+
- **Data Analysis**: Create scripts for data processing and visualization
|
115 |
+
|
116 |
+
### Learning & Education
|
117 |
+
- **Code Examples**: Generate educational code samples with explanations
|
118 |
+
- **Concept Exploration**: Research and understand complex programming concepts
|
119 |
+
- **Best Practices**: Learn current industry standards and methodologies
|
120 |
+
|
121 |
+
## 🔧 Advanced Features
|
122 |
+
|
123 |
+
### Performance Monitoring
|
124 |
+
- Real-time metrics collection
|
125 |
+
- Response time tracking
|
126 |
+
- Success rate monitoring
|
127 |
+
- Resource usage analytics
|
128 |
+
|
129 |
+
### Intelligent Caching
|
130 |
+
- Reduces redundant API calls
|
131 |
+
- Improves response times
|
132 |
+
- Configurable TTL settings
|
133 |
+
|
134 |
+
### Fault Tolerance
|
135 |
+
- Circuit breaker protection
|
136 |
+
- Rate limiting management
|
137 |
+
- Graceful error handling
|
138 |
+
- Automatic retry mechanisms
|
139 |
+
|
140 |
+
### Sandbox Pool Management
|
141 |
+
- Pre-warmed execution environments
|
142 |
+
- Optimized performance
|
143 |
+
- Resource pooling
|
144 |
+
- Automatic scaling
|
145 |
+
|
146 |
+
## 📱 Interface Tabs
|
147 |
+
|
148 |
+
1. **Orchestrator Flow**: Complete end-to-end workflow
|
149 |
+
2. **Individual Agents**: Access each agent separately for specific tasks
|
150 |
+
3. **Advanced Features**: System monitoring and performance analytics
|
151 |
+
|
152 |
+
## 🤝 MCP Integration
|
153 |
+
|
154 |
+
This application demonstrates advanced MCP (Model Context Protocol) implementation:
|
155 |
+
|
156 |
+
- **Server Architecture**: Full MCP server with schema generation
|
157 |
+
- **Function Registry**: Proper MCP function definitions with typing
|
158 |
+
- **Multi-Agent Communication**: Structured data flow between agents
|
159 |
+
- **Error Handling**: Robust error management across agent interactions
|
160 |
+
|
161 |
+
## 📊 Performance
|
162 |
+
|
163 |
+
- **Response Times**: Optimized for sub-second agent responses
|
164 |
+
- **Scalability**: Handles concurrent requests efficiently
|
165 |
+
- **Reliability**: Built-in fault tolerance and monitoring
|
166 |
+
- **Resource Management**: Intelligent caching and pooling
|
167 |
+
|
168 |
+
## 🔍 Technical Details
|
169 |
+
|
170 |
+
- **Python**: 3.12+ required
|
171 |
+
- **Framework**: Gradio with MCP server capabilities
|
172 |
+
- **Execution**: Modal for secure sandboxed code execution
|
173 |
+
- **Search**: Tavily API for real-time web research
|
174 |
+
- **Monitoring**: Comprehensive performance and health tracking
|
175 |
+
|
176 |
+
---
|
177 |
+
|
178 |
+
**Ready to experience the future of AI-assisted research and development?**
|
179 |
+
|
180 |
+
Start by configuring your API keys and dive into the world of multi-agent AI collaboration! 🚀
|
app.py
ADDED
The diff for this file is too large to render.
See raw diff
|
|
mcp_api_call.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from gradio_client import Client
|
2 |
+
|
3 |
+
def print_human_readable_result(result):
|
4 |
+
# Print main request and status
|
5 |
+
if isinstance(result, tuple):
|
6 |
+
result = next((item for item in result if isinstance(item, dict)), result[0])
|
7 |
+
print("Status:", result.get('status', 'N/A'))
|
8 |
+
print("Status:", result.get('status', 'N/A'))
|
9 |
+
print("User Request:", result.get('user_request', 'N/A'))
|
10 |
+
print("\nSub-Questions:")
|
11 |
+
for i, sub_q in enumerate(result.get('sub_questions', []), 1):
|
12 |
+
print(f" {i}. {sub_q}")
|
13 |
+
|
14 |
+
print("\nSearch Summaries:")
|
15 |
+
for i, summary in enumerate(result.get('search_summaries', []), 1):
|
16 |
+
print(f" {i}. {summary}")
|
17 |
+
|
18 |
+
print("\nSearch Results:")
|
19 |
+
for i, res in enumerate(result.get('search_results', []), 1):
|
20 |
+
print(f" {i}. {res['title']}\n URL: {res['url']}\n Content: {res['content'][:100]}{'...' if len(res['content']) > 100 else ''}\n Score: {res['score']:.3f}")
|
21 |
+
|
22 |
+
print("\nGenerated Code:\n" + result.get('code_string', 'N/A'))
|
23 |
+
|
24 |
+
print("\nExecution Output:\n" + result.get('execution_output', 'N/A'))
|
25 |
+
|
26 |
+
print("\nCitations:")
|
27 |
+
for i, cit in enumerate(result.get('citations', []), 1):
|
28 |
+
print(f" {i}. {cit}")
|
29 |
+
|
30 |
+
print("\nFinal Summary:\n" + result.get('final_summary', 'N/A'))
|
31 |
+
|
32 |
+
print("\nOrchestration Message:", result.get('message', 'N/A'))
|
33 |
+
|
34 |
+
client = Client("http://127.0.0.1:7860/")
|
35 |
+
result = client.predict(
|
36 |
+
user_request="How do I calculate the sum of an array in Python?",
|
37 |
+
api_name="/process_orchestrator_request"
|
38 |
+
)
|
39 |
+
print_human_readable_result(result)
|
mcp_hub/__init__.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""MCP Hub - Multi-Agent Communication Protocol Hub for Research and Code Generation."""
|
2 |
+
|
3 |
+
__version__ = "1.0.0"
|
4 |
+
__author__ = "Your Name"
|
5 |
+
__description__ = "Advanced MCP Hub with intelligent agent orchestration"
|
6 |
+
|
7 |
+
# Core imports that should be available at package level
|
8 |
+
try:
|
9 |
+
from .config import api_config, model_config, app_config
|
10 |
+
from .exceptions import APIError, ValidationError, CodeGenerationError, CodeExecutionError
|
11 |
+
from .logging_config import logger
|
12 |
+
|
13 |
+
__all__ = [
|
14 |
+
"api_config", "model_config", "app_config",
|
15 |
+
"APIError", "ValidationError", "CodeGenerationError", "CodeExecutionError",
|
16 |
+
"logger"
|
17 |
+
]
|
18 |
+
except ImportError:
|
19 |
+
# Graceful degradation for missing dependencies
|
20 |
+
__all__ = []
|
mcp_hub/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (818 Bytes). View file
|
|
mcp_hub/__pycache__/cache_utils.cpython-312.pyc
ADDED
Binary file (11 kB). View file
|
|
mcp_hub/__pycache__/config.cpython-312.pyc
ADDED
Binary file (5.42 kB). View file
|
|
mcp_hub/__pycache__/exceptions.cpython-312.pyc
ADDED
Binary file (1.83 kB). View file
|
|
mcp_hub/__pycache__/health_monitoring.cpython-312.pyc
ADDED
Binary file (11.2 kB). View file
|
|
mcp_hub/__pycache__/logging_config.cpython-312.pyc
ADDED
Binary file (2.1 kB). View file
|
|
mcp_hub/__pycache__/package_utils.cpython-312.pyc
ADDED
Binary file (6.79 kB). View file
|
|
mcp_hub/__pycache__/performance_monitoring.cpython-312.pyc
ADDED
Binary file (13.3 kB). View file
|
|
mcp_hub/__pycache__/reliability_utils.cpython-312.pyc
ADDED
Binary file (11.6 kB). View file
|
|
mcp_hub/__pycache__/sandbox_pool.cpython-312.pyc
ADDED
Binary file (37.9 kB). View file
|
|
mcp_hub/__pycache__/utils.cpython-312.pyc
ADDED
Binary file (19.1 kB). View file
|
|
mcp_hub/advanced_config.py
ADDED
@@ -0,0 +1,272 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Advanced configuration management with validation and environment-specific settings."""
|
2 |
+
|
3 |
+
import os
|
4 |
+
import json
|
5 |
+
from pathlib import Path
|
6 |
+
from typing import Dict, Any, Optional
|
7 |
+
from dataclasses import dataclass, field
|
8 |
+
from .logging_config import logger
|
9 |
+
|
10 |
+
@dataclass
|
11 |
+
class APIConfig:
|
12 |
+
"""API configuration with validation."""
|
13 |
+
nebius_api_key: str = ""
|
14 |
+
nebius_base_url: str = "https://api.studio.nebius.ai/v1/"
|
15 |
+
tavily_api_key: str = ""
|
16 |
+
|
17 |
+
# API-specific settings
|
18 |
+
nebius_model: str = "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
19 |
+
nebius_max_tokens: int = 1000
|
20 |
+
nebius_temperature: float = 0.7
|
21 |
+
|
22 |
+
tavily_search_depth: str = "basic"
|
23 |
+
tavily_max_results: int = 5
|
24 |
+
|
25 |
+
def __post_init__(self):
|
26 |
+
"""Validate configuration after initialization."""
|
27 |
+
if not self.nebius_api_key:
|
28 |
+
raise ValueError("NEBIUS_API_KEY is required")
|
29 |
+
if not self.tavily_api_key:
|
30 |
+
raise ValueError("TAVILY_API_KEY is required")
|
31 |
+
|
32 |
+
# Validate numeric ranges
|
33 |
+
if not 0.0 <= self.nebius_temperature <= 2.0:
|
34 |
+
raise ValueError("nebius_temperature must be between 0.0 and 2.0")
|
35 |
+
if self.nebius_max_tokens <= 0:
|
36 |
+
raise ValueError("nebius_max_tokens must be positive")
|
37 |
+
if self.tavily_max_results <= 0:
|
38 |
+
raise ValueError("tavily_max_results must be positive")
|
39 |
+
|
40 |
+
@dataclass
|
41 |
+
class AppConfig:
|
42 |
+
"""Application configuration."""
|
43 |
+
environment: str = "development" # development, staging, production
|
44 |
+
debug: bool = True
|
45 |
+
log_level: str = "INFO"
|
46 |
+
|
47 |
+
# Gradio settings
|
48 |
+
gradio_server_name: str = "0.0.0.0"
|
49 |
+
gradio_server_port: int = 7860
|
50 |
+
gradio_share: bool = False
|
51 |
+
gradio_auth: Optional[tuple] = None
|
52 |
+
|
53 |
+
# Performance settings
|
54 |
+
max_search_results: int = 10
|
55 |
+
max_sub_questions: int = 5
|
56 |
+
cache_ttl_seconds: int = 3600
|
57 |
+
request_timeout_seconds: int = 30
|
58 |
+
|
59 |
+
# Rate limiting
|
60 |
+
api_calls_per_second: float = 2.0
|
61 |
+
api_burst_size: int = 5
|
62 |
+
|
63 |
+
# Circuit breaker settings
|
64 |
+
circuit_breaker_failure_threshold: int = 5
|
65 |
+
circuit_breaker_timeout_seconds: int = 60
|
66 |
+
|
67 |
+
# Monitoring settings
|
68 |
+
metrics_retention_hours: int = 24
|
69 |
+
health_check_interval_seconds: int = 300 # 5 minutes
|
70 |
+
|
71 |
+
def __post_init__(self):
|
72 |
+
"""Validate application configuration."""
|
73 |
+
valid_environments = ["development", "staging", "production"]
|
74 |
+
if self.environment not in valid_environments:
|
75 |
+
raise ValueError(f"environment must be one of: {valid_environments}")
|
76 |
+
|
77 |
+
valid_log_levels = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
|
78 |
+
if self.log_level not in valid_log_levels:
|
79 |
+
raise ValueError(f"log_level must be one of: {valid_log_levels}")
|
80 |
+
|
81 |
+
if self.gradio_server_port <= 0 or self.gradio_server_port > 65535:
|
82 |
+
raise ValueError("gradio_server_port must be between 1 and 65535")
|
83 |
+
|
84 |
+
@dataclass
|
85 |
+
class SecurityConfig:
|
86 |
+
"""Security configuration."""
|
87 |
+
enable_authentication: bool = False
|
88 |
+
allowed_origins: list = field(default_factory=lambda: ["*"])
|
89 |
+
api_key_header: str = "X-API-Key"
|
90 |
+
rate_limit_per_ip: int = 100 # requests per hour
|
91 |
+
max_request_size_mb: int = 10
|
92 |
+
|
93 |
+
# Content filtering
|
94 |
+
enable_content_filtering: bool = True
|
95 |
+
blocked_patterns: list = field(default_factory=list)
|
96 |
+
|
97 |
+
def __post_init__(self):
|
98 |
+
"""Validate security configuration."""
|
99 |
+
if self.rate_limit_per_ip <= 0:
|
100 |
+
raise ValueError("rate_limit_per_ip must be positive")
|
101 |
+
if self.max_request_size_mb <= 0:
|
102 |
+
raise ValueError("max_request_size_mb must be positive")
|
103 |
+
|
104 |
+
class ConfigManager:
|
105 |
+
"""Centralized configuration management with environment-specific overrides."""
|
106 |
+
|
107 |
+
def __init__(self, config_dir: str = "config"):
|
108 |
+
"""
|
109 |
+
Initialize configuration manager.
|
110 |
+
|
111 |
+
Args:
|
112 |
+
config_dir: Directory containing configuration files
|
113 |
+
"""
|
114 |
+
self.config_dir = Path(config_dir)
|
115 |
+
self.config_dir.mkdir(exist_ok=True)
|
116 |
+
|
117 |
+
# Load environment variables
|
118 |
+
self._load_environment_variables()
|
119 |
+
|
120 |
+
# Initialize configurations
|
121 |
+
self.api_config = self._load_api_config()
|
122 |
+
self.app_config = self._load_app_config()
|
123 |
+
self.security_config = self._load_security_config()
|
124 |
+
|
125 |
+
logger.info(f"Configuration loaded for environment: {self.app_config.environment}")
|
126 |
+
|
127 |
+
def _load_environment_variables(self):
|
128 |
+
"""Load environment variables from .env file if it exists."""
|
129 |
+
env_file = Path(".env")
|
130 |
+
if env_file.exists():
|
131 |
+
from dotenv import load_dotenv
|
132 |
+
load_dotenv()
|
133 |
+
logger.info("Loaded environment variables from .env file")
|
134 |
+
|
135 |
+
def _load_api_config(self) -> APIConfig:
|
136 |
+
"""Load API configuration from environment and config files."""
|
137 |
+
# Start with environment variables
|
138 |
+
config_data = {
|
139 |
+
"nebius_api_key": os.getenv("NEBIUS_API_KEY", ""),
|
140 |
+
"nebius_base_url": os.getenv("NEBIUS_BASE_URL", "https://api.studio.nebius.ai/v1/"),
|
141 |
+
"tavily_api_key": os.getenv("TAVILY_API_KEY", ""),
|
142 |
+
"nebius_model": os.getenv("NEBIUS_MODEL", "meta-llama/Meta-Llama-3.1-8B-Instruct"),
|
143 |
+
"nebius_max_tokens": int(os.getenv("NEBIUS_MAX_TOKENS", "1000")),
|
144 |
+
"nebius_temperature": float(os.getenv("NEBIUS_TEMPERATURE", "0.7")),
|
145 |
+
"tavily_search_depth": os.getenv("TAVILY_SEARCH_DEPTH", "basic"),
|
146 |
+
"tavily_max_results": int(os.getenv("TAVILY_MAX_RESULTS", "5"))
|
147 |
+
}
|
148 |
+
|
149 |
+
# Override with config file if it exists
|
150 |
+
config_file = self.config_dir / "api_config.json"
|
151 |
+
if config_file.exists():
|
152 |
+
try:
|
153 |
+
with open(config_file, 'r') as f:
|
154 |
+
file_config = json.load(f)
|
155 |
+
config_data.update(file_config)
|
156 |
+
logger.info("Loaded API configuration from config file")
|
157 |
+
except Exception as e:
|
158 |
+
logger.warning(f"Failed to load API config file: {e}")
|
159 |
+
|
160 |
+
return APIConfig(**config_data)
|
161 |
+
|
162 |
+
def _load_app_config(self) -> AppConfig:
|
163 |
+
"""Load application configuration."""
|
164 |
+
environment = os.getenv("ENVIRONMENT", "development")
|
165 |
+
|
166 |
+
# Base configuration
|
167 |
+
config_data = {
|
168 |
+
"environment": environment,
|
169 |
+
"debug": environment == "development",
|
170 |
+
"log_level": os.getenv("LOG_LEVEL", "INFO"),
|
171 |
+
"gradio_server_name": os.getenv("GRADIO_SERVER_NAME", "0.0.0.0"),
|
172 |
+
"gradio_server_port": int(os.getenv("GRADIO_SERVER_PORT", "7860")),
|
173 |
+
"gradio_share": os.getenv("GRADIO_SHARE", "false").lower() == "true",
|
174 |
+
"max_search_results": int(os.getenv("MAX_SEARCH_RESULTS", "10")),
|
175 |
+
"max_sub_questions": int(os.getenv("MAX_SUB_QUESTIONS", "5")),
|
176 |
+
"cache_ttl_seconds": int(os.getenv("CACHE_TTL_SECONDS", "3600")),
|
177 |
+
"request_timeout_seconds": int(os.getenv("REQUEST_TIMEOUT_SECONDS", "30"))
|
178 |
+
}
|
179 |
+
|
180 |
+
# Environment-specific overrides
|
181 |
+
env_config_file = self.config_dir / f"app_config_{environment}.json"
|
182 |
+
if env_config_file.exists():
|
183 |
+
try:
|
184 |
+
with open(env_config_file, 'r') as f:
|
185 |
+
env_config = json.load(f)
|
186 |
+
config_data.update(env_config)
|
187 |
+
logger.info(f"Loaded environment-specific config: {environment}")
|
188 |
+
except Exception as e:
|
189 |
+
logger.warning(f"Failed to load environment config: {e}")
|
190 |
+
|
191 |
+
return AppConfig(**config_data)
|
192 |
+
|
193 |
+
def _load_security_config(self) -> SecurityConfig:
|
194 |
+
"""Load security configuration."""
|
195 |
+
config_data = {
|
196 |
+
"enable_authentication": os.getenv("ENABLE_AUTH", "false").lower() == "true",
|
197 |
+
"rate_limit_per_ip": int(os.getenv("RATE_LIMIT_PER_IP", "100")),
|
198 |
+
"max_request_size_mb": int(os.getenv("MAX_REQUEST_SIZE_MB", "10")),
|
199 |
+
"enable_content_filtering": os.getenv("ENABLE_CONTENT_FILTERING", "true").lower() == "true"
|
200 |
+
}
|
201 |
+
|
202 |
+
# Load from config file
|
203 |
+
config_file = self.config_dir / "security_config.json"
|
204 |
+
if config_file.exists():
|
205 |
+
try:
|
206 |
+
with open(config_file, 'r') as f:
|
207 |
+
file_config = json.load(f)
|
208 |
+
config_data.update(file_config)
|
209 |
+
logger.info("Loaded security configuration from config file")
|
210 |
+
except Exception as e:
|
211 |
+
logger.warning(f"Failed to load security config: {e}")
|
212 |
+
|
213 |
+
return SecurityConfig(**config_data)
|
214 |
+
|
215 |
+
def save_config_template(self):
|
216 |
+
"""Save configuration templates for easy editing."""
|
217 |
+
templates = {
|
218 |
+
"api_config.json": {
|
219 |
+
"nebius_model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
220 |
+
"nebius_max_tokens": 1000,
|
221 |
+
"nebius_temperature": 0.7,
|
222 |
+
"tavily_search_depth": "basic",
|
223 |
+
"tavily_max_results": 5
|
224 |
+
},
|
225 |
+
"app_config_development.json": {
|
226 |
+
"debug": True,
|
227 |
+
"log_level": "DEBUG",
|
228 |
+
"gradio_share": False,
|
229 |
+
"max_search_results": 5
|
230 |
+
},
|
231 |
+
"app_config_production.json": {
|
232 |
+
"debug": False,
|
233 |
+
"log_level": "INFO",
|
234 |
+
"gradio_share": False,
|
235 |
+
"max_search_results": 10,
|
236 |
+
"cache_ttl_seconds": 7200
|
237 |
+
},
|
238 |
+
"security_config.json": {
|
239 |
+
"enable_authentication": False,
|
240 |
+
"allowed_origins": ["*"],
|
241 |
+
"rate_limit_per_ip": 100,
|
242 |
+
"enable_content_filtering": True,
|
243 |
+
"blocked_patterns": []
|
244 |
+
}
|
245 |
+
}
|
246 |
+
|
247 |
+
for filename, template in templates.items():
|
248 |
+
config_file = self.config_dir / filename
|
249 |
+
if not config_file.exists():
|
250 |
+
try:
|
251 |
+
with open(config_file, 'w') as f:
|
252 |
+
json.dump(template, f, indent=2)
|
253 |
+
logger.info(f"Created config template: {filename}")
|
254 |
+
except Exception as e:
|
255 |
+
logger.error(f"Failed to create config template {filename}: {e}")
|
256 |
+
|
257 |
+
def get_config_summary(self) -> Dict[str, Any]:
|
258 |
+
"""Get a summary of current configuration (without sensitive data)."""
|
259 |
+
return {
|
260 |
+
"environment": self.app_config.environment,
|
261 |
+
"debug_mode": self.app_config.debug,
|
262 |
+
"log_level": self.app_config.log_level,
|
263 |
+
"gradio_port": self.app_config.gradio_server_port,
|
264 |
+
"cache_ttl": self.app_config.cache_ttl_seconds,
|
265 |
+
"max_search_results": self.app_config.max_search_results,
|
266 |
+
"authentication_enabled": self.security_config.enable_authentication,
|
267 |
+
"content_filtering_enabled": self.security_config.enable_content_filtering,
|
268 |
+
"api_endpoints": {
|
269 |
+
"nebius": bool(self.api_config.nebius_api_key),
|
270 |
+
"tavily": bool(self.api_config.tavily_api_key)
|
271 |
+
}
|
272 |
+
}
|
mcp_hub/async_utils.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Async utilities for improved performance in concurrent operations."""
|
2 |
+
|
3 |
+
import asyncio
|
4 |
+
import aiohttp
|
5 |
+
from typing import Dict, Any, List
|
6 |
+
from concurrent.futures import ThreadPoolExecutor
|
7 |
+
from .config import api_config, app_config
|
8 |
+
from .exceptions import APIError
|
9 |
+
from .logging_config import logger
|
10 |
+
|
11 |
+
class AsyncWebSearchAgent:
|
12 |
+
"""Async version of web search for concurrent operations."""
|
13 |
+
|
14 |
+
def __init__(self):
|
15 |
+
self.session = None
|
16 |
+
|
17 |
+
async def __aenter__(self):
|
18 |
+
"""Async context manager entry."""
|
19 |
+
self.session = aiohttp.ClientSession()
|
20 |
+
return self
|
21 |
+
|
22 |
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
23 |
+
"""Async context manager exit."""
|
24 |
+
if self.session:
|
25 |
+
await self.session.close()
|
26 |
+
|
27 |
+
async def search_multiple_queries(self, queries: List[str]) -> List[Dict[str, Any]]:
|
28 |
+
"""Search multiple queries concurrently."""
|
29 |
+
if not self.session:
|
30 |
+
raise APIError("AsyncWebSearch", "Session not initialized. Use as async context manager.")
|
31 |
+
|
32 |
+
logger.info(f"Starting concurrent search for {len(queries)} queries")
|
33 |
+
|
34 |
+
# Create tasks for concurrent execution
|
35 |
+
tasks = [self._search_single_query(query) for query in queries]
|
36 |
+
|
37 |
+
# Execute all searches concurrently
|
38 |
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
39 |
+
|
40 |
+
# Process results and handle any exceptions
|
41 |
+
processed_results = []
|
42 |
+
for i, result in enumerate(results):
|
43 |
+
if isinstance(result, Exception):
|
44 |
+
logger.error(f"Search failed for query {i}: {str(result)}")
|
45 |
+
processed_results.append({
|
46 |
+
"error": str(result),
|
47 |
+
"query": queries[i],
|
48 |
+
"results": []
|
49 |
+
})
|
50 |
+
else:
|
51 |
+
processed_results.append(result)
|
52 |
+
|
53 |
+
logger.info(f"Completed concurrent searches: {len([r for r in processed_results if not r.get('error')])} successful")
|
54 |
+
return processed_results
|
55 |
+
|
56 |
+
async def _search_single_query(self, query: str) -> Dict[str, Any]:
|
57 |
+
"""Search a single query using Tavily API."""
|
58 |
+
try:
|
59 |
+
# In a real implementation, you'd make async HTTP calls to Tavily
|
60 |
+
# For now, we'll use the sync version in a thread pool
|
61 |
+
from tavily import TavilyClient
|
62 |
+
client = TavilyClient(api_key=api_config.tavily_api_key)
|
63 |
+
|
64 |
+
# Run sync operation in thread pool
|
65 |
+
loop = asyncio.get_event_loop()
|
66 |
+
with ThreadPoolExecutor() as executor:
|
67 |
+
response = await loop.run_in_executor(
|
68 |
+
executor,
|
69 |
+
lambda: client.search(
|
70 |
+
query=query,
|
71 |
+
search_depth="basic",
|
72 |
+
max_results=app_config.max_search_results,
|
73 |
+
include_answer=True
|
74 |
+
)
|
75 |
+
)
|
76 |
+
|
77 |
+
return {
|
78 |
+
"query": response.get("query", query),
|
79 |
+
"tavily_answer": response.get("answer"),
|
80 |
+
"results": response.get("results", []),
|
81 |
+
"data_source": "Tavily Search API (Async)",
|
82 |
+
}
|
83 |
+
|
84 |
+
except Exception as e:
|
85 |
+
raise APIError("Tavily", f"Async search failed: {str(e)}")
|
86 |
+
|
87 |
+
async def process_subquestions_concurrently(sub_questions: List[str]) -> List[Dict[str, Any]]:
|
88 |
+
"""Process multiple sub-questions concurrently for better performance."""
|
89 |
+
logger.info(f"Processing {len(sub_questions)} sub-questions concurrently")
|
90 |
+
|
91 |
+
async with AsyncWebSearchAgent() as async_searcher:
|
92 |
+
# Execute all searches concurrently
|
93 |
+
search_results = await async_searcher.search_multiple_queries(sub_questions)
|
94 |
+
|
95 |
+
return search_results
|
mcp_hub/cache_utils.py
ADDED
@@ -0,0 +1,211 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Caching system for improved performance and reduced API calls."""
|
2 |
+
|
3 |
+
import hashlib
|
4 |
+
import json
|
5 |
+
import pickle
|
6 |
+
from datetime import datetime, timedelta
|
7 |
+
from pathlib import Path
|
8 |
+
from typing import Any, Dict, Optional, Callable
|
9 |
+
from functools import wraps
|
10 |
+
from .logging_config import logger
|
11 |
+
|
12 |
+
class CacheManager:
|
13 |
+
"""Simple file-based cache manager for API responses and computations."""
|
14 |
+
|
15 |
+
def __init__(self, cache_dir: str = "cache", default_ttl: int = 3600):
|
16 |
+
"""
|
17 |
+
Initialize cache manager.
|
18 |
+
|
19 |
+
Args:
|
20 |
+
cache_dir: Directory to store cache files
|
21 |
+
default_ttl: Default time-to-live in seconds (1 hour default)
|
22 |
+
"""
|
23 |
+
self.cache_dir = Path(cache_dir)
|
24 |
+
self.cache_dir.mkdir(exist_ok=True)
|
25 |
+
self.default_ttl = default_ttl
|
26 |
+
logger.info(f"Cache manager initialized with directory: {self.cache_dir}")
|
27 |
+
|
28 |
+
def _get_cache_key(self, func_name: str, args: tuple, kwargs: dict) -> str:
|
29 |
+
"""Generate a unique cache key based on function name and arguments."""
|
30 |
+
# Create a string representation of arguments
|
31 |
+
key_data = {
|
32 |
+
"func": func_name,
|
33 |
+
"args": args,
|
34 |
+
"kwargs": kwargs
|
35 |
+
}
|
36 |
+
key_string = json.dumps(key_data, sort_keys=True, default=str)
|
37 |
+
return hashlib.md5(key_string.encode()).hexdigest()
|
38 |
+
|
39 |
+
def _get_cache_path(self, cache_key: str) -> Path:
|
40 |
+
"""Get the file path for a cache key."""
|
41 |
+
return self.cache_dir / f"{cache_key}.cache"
|
42 |
+
|
43 |
+
def get(self, cache_key: str) -> Optional[Any]:
|
44 |
+
"""Retrieve a value from cache if it exists and is not expired."""
|
45 |
+
cache_path = self._get_cache_path(cache_key)
|
46 |
+
|
47 |
+
if not cache_path.exists():
|
48 |
+
return None
|
49 |
+
|
50 |
+
try:
|
51 |
+
with open(cache_path, 'rb') as f:
|
52 |
+
cache_data = pickle.load(f)
|
53 |
+
|
54 |
+
# Check if cache has expired
|
55 |
+
if datetime.now() > cache_data['expires_at']:
|
56 |
+
logger.debug(f"Cache expired for key: {cache_key}")
|
57 |
+
cache_path.unlink() # Delete expired cache
|
58 |
+
return None
|
59 |
+
|
60 |
+
logger.debug(f"Cache hit for key: {cache_key}")
|
61 |
+
return cache_data['value']
|
62 |
+
|
63 |
+
except (EOFError, pickle.PickleError, KeyError) as e:
|
64 |
+
logger.warning(f"Cache corruption for key {cache_key}: {e}")
|
65 |
+
cache_path.unlink() # Delete corrupted cache
|
66 |
+
return None
|
67 |
+
|
68 |
+
def set(self, cache_key: str, value: Any, ttl: Optional[int] = None) -> None:
|
69 |
+
"""Store a value in cache with optional TTL."""
|
70 |
+
if ttl is None:
|
71 |
+
ttl = self.default_ttl
|
72 |
+
|
73 |
+
cache_data = {
|
74 |
+
'value': value,
|
75 |
+
'created_at': datetime.now(),
|
76 |
+
'expires_at': datetime.now() + timedelta(seconds=ttl)
|
77 |
+
}
|
78 |
+
|
79 |
+
cache_path = self._get_cache_path(cache_key)
|
80 |
+
|
81 |
+
try:
|
82 |
+
with open(cache_path, 'wb') as f:
|
83 |
+
pickle.dump(cache_data, f)
|
84 |
+
logger.debug(f"Cached value for key: {cache_key} (TTL: {ttl}s)")
|
85 |
+
except Exception as e:
|
86 |
+
logger.error(f"Failed to cache value for key {cache_key}: {e}")
|
87 |
+
|
88 |
+
def cached_call(self, func: Callable, args: tuple, kwargs: dict, ttl: Optional[int] = None) -> Any:
|
89 |
+
"""Make a cached function call."""
|
90 |
+
cache_key = self._get_cache_key(func.__name__, args, kwargs)
|
91 |
+
|
92 |
+
# Try to get from cache first
|
93 |
+
cached_result = self.get(cache_key)
|
94 |
+
if cached_result is not None:
|
95 |
+
return cached_result
|
96 |
+
|
97 |
+
# Execute function and cache result
|
98 |
+
logger.debug(f"Cache miss for {func.__name__}, executing function")
|
99 |
+
result = func(*args, **kwargs)
|
100 |
+
self.set(cache_key, result, ttl)
|
101 |
+
|
102 |
+
return result
|
103 |
+
|
104 |
+
def clear_expired(self) -> int:
|
105 |
+
"""Remove all expired cache files and return count of removed files."""
|
106 |
+
removed_count = 0
|
107 |
+
current_time = datetime.now()
|
108 |
+
|
109 |
+
for cache_file in self.cache_dir.glob("*.cache"):
|
110 |
+
try:
|
111 |
+
with open(cache_file, 'rb') as f:
|
112 |
+
cache_data = pickle.load(f)
|
113 |
+
|
114 |
+
if current_time > cache_data['expires_at']:
|
115 |
+
cache_file.unlink()
|
116 |
+
removed_count += 1
|
117 |
+
|
118 |
+
except Exception as e:
|
119 |
+
logger.warning(f"Error checking cache file {cache_file}: {e}")
|
120 |
+
cache_file.unlink() # Remove corrupted files
|
121 |
+
removed_count += 1
|
122 |
+
|
123 |
+
if removed_count > 0:
|
124 |
+
logger.info(f"Removed {removed_count} expired cache files")
|
125 |
+
|
126 |
+
return removed_count
|
127 |
+
|
128 |
+
def clear_all(self) -> int:
|
129 |
+
"""Remove all cache files and return count of removed files."""
|
130 |
+
removed_count = 0
|
131 |
+
for cache_file in self.cache_dir.glob("*.cache"):
|
132 |
+
cache_file.unlink()
|
133 |
+
removed_count += 1
|
134 |
+
|
135 |
+
logger.info(f"Cleared all cache: removed {removed_count} files")
|
136 |
+
return removed_count
|
137 |
+
|
138 |
+
def get_cache_status(self) -> Dict[str, Any]:
|
139 |
+
"""Get detailed status information about the cache system."""
|
140 |
+
try:
|
141 |
+
# Count cache files
|
142 |
+
cache_files = list(self.cache_dir.glob("*.cache"))
|
143 |
+
cache_count = len(cache_files)
|
144 |
+
|
145 |
+
# Calculate cache directory size
|
146 |
+
total_size = sum(f.stat().st_size for f in cache_files)
|
147 |
+
|
148 |
+
# Count expired files
|
149 |
+
expired_count = 0
|
150 |
+
current_time = datetime.now()
|
151 |
+
for cache_file in cache_files:
|
152 |
+
try:
|
153 |
+
with open(cache_file, 'rb') as f:
|
154 |
+
cache_data = pickle.load(f)
|
155 |
+
|
156 |
+
if current_time > cache_data['expires_at']:
|
157 |
+
expired_count += 1
|
158 |
+
except Exception:
|
159 |
+
expired_count += 1 # Count corrupted files as expired
|
160 |
+
|
161 |
+
# Get cache stats
|
162 |
+
return {
|
163 |
+
"status": "healthy",
|
164 |
+
"cache_dir": str(self.cache_dir),
|
165 |
+
"total_files": cache_count,
|
166 |
+
"expired_files": expired_count,
|
167 |
+
"total_size_bytes": total_size,
|
168 |
+
"total_size_mb": round(total_size / (1024 * 1024), 2),
|
169 |
+
"default_ttl_seconds": self.default_ttl,
|
170 |
+
"timestamp": datetime.now().isoformat()
|
171 |
+
}
|
172 |
+
except Exception as e:
|
173 |
+
logger.error(f"Failed to get cache status: {str(e)}")
|
174 |
+
return {
|
175 |
+
"status": "error",
|
176 |
+
"error": str(e),
|
177 |
+
"timestamp": datetime.now().isoformat()
|
178 |
+
}
|
179 |
+
|
180 |
+
# Global cache manager instance
|
181 |
+
cache_manager = CacheManager()
|
182 |
+
|
183 |
+
def cached(ttl: int = 3600):
|
184 |
+
"""
|
185 |
+
Decorator to cache function results.
|
186 |
+
|
187 |
+
Args:
|
188 |
+
ttl: Time-to-live in seconds
|
189 |
+
"""
|
190 |
+
def decorator(func: Callable):
|
191 |
+
@wraps(func)
|
192 |
+
def wrapper(*args, **kwargs):
|
193 |
+
return cache_manager.cached_call(func, args, kwargs, ttl)
|
194 |
+
return wrapper
|
195 |
+
return decorator
|
196 |
+
|
197 |
+
# Specialized caching functions for common operations
|
198 |
+
@cached(ttl=1800) # 30 minutes
|
199 |
+
def cached_web_search(query: str) -> Dict[str, Any]:
|
200 |
+
"""Cached version of web search - import happens at runtime."""
|
201 |
+
# Import at runtime to avoid circular imports
|
202 |
+
from tavily import TavilyClient
|
203 |
+
client = TavilyClient(api_key="placeholder") # Will be replaced at runtime
|
204 |
+
# This is a placeholder - actual implementation would use the real agent
|
205 |
+
return {"query": query, "results": [], "cached": True}
|
206 |
+
|
207 |
+
@cached(ttl=3600) # 1 hour
|
208 |
+
def cached_llm_processing(text_input: str, task: str, context: Optional[str] = None) -> Dict[str, Any]:
|
209 |
+
"""Cached version of LLM processing - import happens at runtime."""
|
210 |
+
# This is a placeholder for the caching pattern
|
211 |
+
return {"input_text": text_input, "task": task, "cached": True}
|
mcp_hub/config.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Configuration management for the MCP Hub project."""
|
2 |
+
|
3 |
+
import os
|
4 |
+
from dataclasses import dataclass
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
|
7 |
+
# Load environment variables
|
8 |
+
load_dotenv()
|
9 |
+
|
10 |
+
@dataclass
|
11 |
+
class APIConfig:
|
12 |
+
"""API configuration settings."""
|
13 |
+
# Provider selection
|
14 |
+
llm_provider: str = "nebius" # Options: "nebius", "openai", "anthropic", "huggingface"
|
15 |
+
|
16 |
+
# Provider API keys
|
17 |
+
nebius_api_key: str = ""
|
18 |
+
openai_api_key: str = ""
|
19 |
+
anthropic_api_key: str = ""
|
20 |
+
huggingface_api_key: str = ""
|
21 |
+
|
22 |
+
# Other APIs
|
23 |
+
tavily_api_key: str = ""
|
24 |
+
|
25 |
+
# Provider URLs
|
26 |
+
nebius_base_url: str = "https://api.studio.nebius.com/v1/"
|
27 |
+
huggingface_base_url: str = "https://api-inference.huggingface.co"
|
28 |
+
|
29 |
+
# Other settings
|
30 |
+
current_year: str = "2025"
|
31 |
+
|
32 |
+
def __post_init__(self):
|
33 |
+
"""Validate required API keys based on selected provider."""
|
34 |
+
# Always require Tavily for search functionality
|
35 |
+
if not self.tavily_api_key or not self.tavily_api_key.startswith("tvly-"):
|
36 |
+
raise RuntimeError("A valid TAVILY_API_KEY is required in your .env file.")
|
37 |
+
|
38 |
+
# Validate LLM provider selection
|
39 |
+
valid_providers = ["nebius", "openai", "anthropic", "huggingface"]
|
40 |
+
if self.llm_provider not in valid_providers:
|
41 |
+
raise RuntimeError(f"LLM_PROVIDER must be one of: {', '.join(valid_providers)}")
|
42 |
+
|
43 |
+
# Validate required API key for selected provider
|
44 |
+
if self.llm_provider == "nebius" and not self.nebius_api_key:
|
45 |
+
raise RuntimeError("NEBIUS_API_KEY is required when using nebius provider.")
|
46 |
+
elif self.llm_provider == "openai" and not self.openai_api_key:
|
47 |
+
raise RuntimeError("OPENAI_API_KEY is required when using openai provider.")
|
48 |
+
elif self.llm_provider == "anthropic" and not self.anthropic_api_key:
|
49 |
+
raise RuntimeError("ANTHROPIC_API_KEY is required when using anthropic provider.")
|
50 |
+
elif self.llm_provider == "huggingface" and not self.huggingface_api_key:
|
51 |
+
raise RuntimeError("HUGGINGFACE_API_KEY is required when using huggingface provider.")
|
52 |
+
|
53 |
+
@dataclass
|
54 |
+
class ModelConfig:
|
55 |
+
"""Model configuration settings."""
|
56 |
+
# Default models (Nebius/HuggingFace compatible)
|
57 |
+
question_enhancer_model: str = "Qwen/Qwen3-4B-fast"
|
58 |
+
llm_processor_model: str = "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
59 |
+
code_generator_model: str = "Qwen/Qwen2.5-Coder-32B-Instruct-fast"
|
60 |
+
orchestrator_model: str = "Qwen/Qwen3-32B-fast"
|
61 |
+
|
62 |
+
def get_model_for_provider(self, task: str, provider: str) -> str:
|
63 |
+
"""Get appropriate model for the given task and provider."""
|
64 |
+
|
65 |
+
# Model mappings by provider
|
66 |
+
provider_models = {
|
67 |
+
"nebius": {
|
68 |
+
"question_enhancer": self.question_enhancer_model,
|
69 |
+
"llm_processor": self.llm_processor_model,
|
70 |
+
"code_generator": self.code_generator_model,
|
71 |
+
"orchestrator": self.orchestrator_model,
|
72 |
+
},
|
73 |
+
"openai": {
|
74 |
+
"question_enhancer": "gpt-4.1-nano",
|
75 |
+
"llm_processor": "gpt-4.1-nano",
|
76 |
+
"code_generator": "gpt-4.1",
|
77 |
+
"orchestrator": "gpt-4.1",
|
78 |
+
},
|
79 |
+
"anthropic": {
|
80 |
+
"question_enhancer": "claude-3-5-haiku-latest",#
|
81 |
+
"llm_processor": "claude-3-5-sonnet-latest",
|
82 |
+
"code_generator": "claude-sonnet-4-0",
|
83 |
+
"orchestrator": "claude-sonnet-4-0",
|
84 |
+
},
|
85 |
+
"huggingface": {
|
86 |
+
"question_enhancer": "microsoft/phi-4",
|
87 |
+
"llm_processor": "microsoft/phi-4",
|
88 |
+
"code_generator": "Qwen/Qwen2.5-Coder-32B-Instruct",
|
89 |
+
"orchestrator": "microsoft/phi-4",
|
90 |
+
}
|
91 |
+
}
|
92 |
+
|
93 |
+
if provider not in provider_models:
|
94 |
+
# Fall back to default models
|
95 |
+
return getattr(self, f"{task}_model", self.llm_processor_model)
|
96 |
+
|
97 |
+
return provider_models[provider].get(task, provider_models[provider]["llm_processor"])
|
98 |
+
|
99 |
+
@dataclass
|
100 |
+
class AppConfig:
|
101 |
+
"""Application configuration settings."""
|
102 |
+
modal_app_name: str = "my-sandbox-app"
|
103 |
+
max_search_results: int = 2
|
104 |
+
max_code_generation_attempts: int = 3
|
105 |
+
llm_temperature: float = 0.6
|
106 |
+
code_gen_temperature: float = 0.1
|
107 |
+
|
108 |
+
# Create global configuration instances
|
109 |
+
api_config = APIConfig(
|
110 |
+
llm_provider=os.environ.get("LLM_PROVIDER", "nebius"),
|
111 |
+
nebius_api_key=os.environ.get("NEBIUS_API_KEY", ""),
|
112 |
+
openai_api_key=os.environ.get("OPENAI_API_KEY", ""),
|
113 |
+
anthropic_api_key=os.environ.get("ANTHROPIC_API_KEY", ""),
|
114 |
+
huggingface_api_key=os.environ.get("HUGGINGFACE_API_KEY", ""),
|
115 |
+
tavily_api_key=os.environ.get("TAVILY_API_KEY", ""),
|
116 |
+
current_year=os.environ.get("CURRENT_YEAR", "2025")
|
117 |
+
)
|
118 |
+
|
119 |
+
model_config = ModelConfig()
|
120 |
+
app_config = AppConfig()
|
mcp_hub/exceptions.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Custom exception classes for the MCP Hub project."""
|
2 |
+
|
3 |
+
class MCPHubError(Exception):
|
4 |
+
"""Base exception class for MCP Hub errors."""
|
5 |
+
pass
|
6 |
+
|
7 |
+
class APIError(MCPHubError):
|
8 |
+
"""Raised when API calls fail."""
|
9 |
+
def __init__(self, service: str, message: str):
|
10 |
+
self.service = service
|
11 |
+
self.message = message
|
12 |
+
super().__init__(f"{service} API Error: {message}")
|
13 |
+
|
14 |
+
class ConfigurationError(MCPHubError):
|
15 |
+
"""Raised when there are configuration issues."""
|
16 |
+
pass
|
17 |
+
|
18 |
+
class ValidationError(MCPHubError):
|
19 |
+
"""Raised when input validation fails."""
|
20 |
+
pass
|
21 |
+
|
22 |
+
class CodeGenerationError(MCPHubError):
|
23 |
+
"""Raised when code generation fails."""
|
24 |
+
pass
|
25 |
+
|
26 |
+
class CodeExecutionError(MCPHubError):
|
27 |
+
"""Raised when code execution fails."""
|
28 |
+
pass
|
mcp_hub/health_monitoring.py
ADDED
@@ -0,0 +1,261 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""System health monitoring and status dashboard functionality."""
|
2 |
+
|
3 |
+
import time
|
4 |
+
import psutil
|
5 |
+
from datetime import datetime
|
6 |
+
from typing import Dict, Any
|
7 |
+
from .config import api_config
|
8 |
+
from .logging_config import logger
|
9 |
+
from .reliability_utils import health_monitor
|
10 |
+
from .performance_monitoring import metrics_collector
|
11 |
+
|
12 |
+
class SystemHealthChecker:
|
13 |
+
"""Comprehensive system health checking."""
|
14 |
+
|
15 |
+
def __init__(self):
|
16 |
+
self.last_check = None
|
17 |
+
self.health_status = {}
|
18 |
+
|
19 |
+
def check_api_connectivity(self) -> Dict[str, Any]:
|
20 |
+
"""Check connectivity to external APIs."""
|
21 |
+
results = {}
|
22 |
+
|
23 |
+
# Check Nebius API
|
24 |
+
try:
|
25 |
+
from openai import OpenAI
|
26 |
+
client = OpenAI(
|
27 |
+
api_key=api_config.nebius_api_key,
|
28 |
+
base_url=api_config.nebius_base_url
|
29 |
+
)
|
30 |
+
|
31 |
+
start_time = time.time()
|
32 |
+
# Make a minimal test call
|
33 |
+
response = client.chat.completions.create(
|
34 |
+
model="meta-llama/Meta-Llama-3.1-8B-Instruct",
|
35 |
+
messages=[{"role": "user", "content": "test"}],
|
36 |
+
max_tokens=1
|
37 |
+
)
|
38 |
+
response_time = time.time() - start_time
|
39 |
+
|
40 |
+
results["nebius"] = {
|
41 |
+
"status": "healthy",
|
42 |
+
"response_time_ms": response_time * 1000,
|
43 |
+
"last_checked": datetime.now().isoformat()
|
44 |
+
}
|
45 |
+
|
46 |
+
except Exception as e:
|
47 |
+
results["nebius"] = {
|
48 |
+
"status": "unhealthy",
|
49 |
+
"error": str(e),
|
50 |
+
"last_checked": datetime.now().isoformat()
|
51 |
+
}
|
52 |
+
|
53 |
+
# Check Tavily API
|
54 |
+
try:
|
55 |
+
from tavily import TavilyClient
|
56 |
+
client = TavilyClient(api_key=api_config.tavily_api_key)
|
57 |
+
|
58 |
+
start_time = time.time()
|
59 |
+
# Make a minimal test search
|
60 |
+
response = client.search(query="test", max_results=1)
|
61 |
+
response_time = time.time() - start_time
|
62 |
+
|
63 |
+
results["tavily"] = {
|
64 |
+
"status": "healthy",
|
65 |
+
"response_time_ms": response_time * 1000,
|
66 |
+
"last_checked": datetime.now().isoformat()
|
67 |
+
}
|
68 |
+
|
69 |
+
except Exception as e:
|
70 |
+
results["tavily"] = {
|
71 |
+
"status": "unhealthy",
|
72 |
+
"error": str(e),
|
73 |
+
"last_checked": datetime.now().isoformat()
|
74 |
+
}
|
75 |
+
|
76 |
+
return results
|
77 |
+
|
78 |
+
def check_system_resources(self) -> Dict[str, Any]:
|
79 |
+
"""Check system resource usage."""
|
80 |
+
try:
|
81 |
+
# CPU usage
|
82 |
+
cpu_percent = psutil.cpu_percent(interval=1)
|
83 |
+
|
84 |
+
# Memory usage
|
85 |
+
memory = psutil.virtual_memory()
|
86 |
+
|
87 |
+
# Disk usage
|
88 |
+
disk = psutil.disk_usage('/')
|
89 |
+
|
90 |
+
# Process-specific metrics
|
91 |
+
process = psutil.Process()
|
92 |
+
process_memory = process.memory_info()
|
93 |
+
|
94 |
+
return {
|
95 |
+
"cpu_percent": cpu_percent,
|
96 |
+
"memory": {
|
97 |
+
"total_gb": memory.total / (1024**3),
|
98 |
+
"available_gb": memory.available / (1024**3),
|
99 |
+
"percent_used": memory.percent
|
100 |
+
},
|
101 |
+
"disk": {
|
102 |
+
"total_gb": disk.total / (1024**3),
|
103 |
+
"free_gb": disk.free / (1024**3),
|
104 |
+
"percent_used": (disk.used / disk.total) * 100
|
105 |
+
},
|
106 |
+
"process": {
|
107 |
+
"memory_mb": process_memory.rss / (1024**2),
|
108 |
+
"cpu_percent": process.cpu_percent()
|
109 |
+
},
|
110 |
+
"status": "healthy",
|
111 |
+
"last_checked": datetime.now().isoformat()
|
112 |
+
}
|
113 |
+
|
114 |
+
except Exception as e:
|
115 |
+
return {
|
116 |
+
"status": "unhealthy",
|
117 |
+
"error": str(e),
|
118 |
+
"last_checked": datetime.now().isoformat()
|
119 |
+
}
|
120 |
+
|
121 |
+
def check_cache_health(self) -> Dict[str, Any]:
|
122 |
+
"""Check cache system health."""
|
123 |
+
try:
|
124 |
+
from cache_utils import cache_manager
|
125 |
+
|
126 |
+
# Count cache files
|
127 |
+
cache_files = list(cache_manager.cache_dir.glob("*.cache"))
|
128 |
+
|
129 |
+
# Calculate cache directory size
|
130 |
+
total_size = sum(f.stat().st_size for f in cache_files)
|
131 |
+
|
132 |
+
return {
|
133 |
+
"cache_files_count": len(cache_files),
|
134 |
+
"total_size_mb": total_size / (1024**2),
|
135 |
+
"cache_directory": str(cache_manager.cache_dir),
|
136 |
+
"status": "healthy",
|
137 |
+
"last_checked": datetime.now().isoformat()
|
138 |
+
}
|
139 |
+
|
140 |
+
except Exception as e:
|
141 |
+
return {
|
142 |
+
"status": "unhealthy",
|
143 |
+
"error": str(e),
|
144 |
+
"last_checked": datetime.now().isoformat()
|
145 |
+
}
|
146 |
+
|
147 |
+
def get_comprehensive_health_report(self) -> Dict[str, Any]:
|
148 |
+
"""Get a comprehensive health report of the entire system."""
|
149 |
+
logger.info("Generating comprehensive health report")
|
150 |
+
|
151 |
+
report = {
|
152 |
+
"timestamp": datetime.now().isoformat(),
|
153 |
+
"overall_status": "healthy" # Will be updated based on checks
|
154 |
+
}
|
155 |
+
|
156 |
+
# Check API connectivity
|
157 |
+
api_health = self.check_api_connectivity()
|
158 |
+
report["api_connectivity"] = api_health
|
159 |
+
|
160 |
+
# Check system resources
|
161 |
+
system_health = self.check_system_resources()
|
162 |
+
report["system_resources"] = system_health
|
163 |
+
|
164 |
+
# Check cache health
|
165 |
+
cache_health = self.check_cache_health()
|
166 |
+
report["cache_system"] = cache_health
|
167 |
+
|
168 |
+
# Get API health stats from monitor
|
169 |
+
try:
|
170 |
+
nebius_stats = health_monitor.get_health_stats("nebius")
|
171 |
+
tavily_stats = health_monitor.get_health_stats("tavily")
|
172 |
+
|
173 |
+
report["api_performance"] = {
|
174 |
+
"nebius": nebius_stats,
|
175 |
+
"tavily": tavily_stats
|
176 |
+
}
|
177 |
+
except Exception as e:
|
178 |
+
report["api_performance"] = {"error": str(e)}
|
179 |
+
|
180 |
+
# Get performance metrics
|
181 |
+
try:
|
182 |
+
performance_summary = metrics_collector.get_metrics_summary()
|
183 |
+
report["performance_metrics"] = performance_summary
|
184 |
+
except Exception as e:
|
185 |
+
report["performance_metrics"] = {"error": str(e)}
|
186 |
+
|
187 |
+
# Determine overall status
|
188 |
+
unhealthy_components = []
|
189 |
+
|
190 |
+
for service, status in api_health.items():
|
191 |
+
if status.get("status") == "unhealthy":
|
192 |
+
unhealthy_components.append(f"API:{service}")
|
193 |
+
|
194 |
+
if system_health.get("status") == "unhealthy":
|
195 |
+
unhealthy_components.append("system_resources")
|
196 |
+
|
197 |
+
if cache_health.get("status") == "unhealthy":
|
198 |
+
unhealthy_components.append("cache_system")
|
199 |
+
|
200 |
+
if unhealthy_components:
|
201 |
+
report["overall_status"] = "degraded"
|
202 |
+
report["unhealthy_components"] = unhealthy_components
|
203 |
+
|
204 |
+
self.last_check = datetime.now()
|
205 |
+
self.health_status = report
|
206 |
+
|
207 |
+
logger.info(f"Health report generated: {report['overall_status']}")
|
208 |
+
return report
|
209 |
+
|
210 |
+
# Global health checker instance
|
211 |
+
health_checker = SystemHealthChecker()
|
212 |
+
|
213 |
+
def create_health_dashboard() -> str:
|
214 |
+
"""Create a formatted health dashboard for display."""
|
215 |
+
report = health_checker.get_comprehensive_health_report()
|
216 |
+
|
217 |
+
dashboard = f"""
|
218 |
+
# 🏥 System Health Dashboard
|
219 |
+
**Last Updated:** {report['timestamp']}
|
220 |
+
**Overall Status:** {'🟢' if report['overall_status'] == 'healthy' else '🟡' if report['overall_status'] == 'degraded' else '🔴'} {report['overall_status'].upper()}
|
221 |
+
|
222 |
+
## 🌐 API Connectivity
|
223 |
+
"""
|
224 |
+
|
225 |
+
for service, status in report.get("api_connectivity", {}).items():
|
226 |
+
status_icon = "🟢" if status.get("status") == "healthy" else "🔴"
|
227 |
+
response_time = status.get("response_time_ms", 0)
|
228 |
+
dashboard += f"- **{service.title()}:** {status_icon} {status.get('status', 'unknown')} ({response_time:.1f}ms)\n"
|
229 |
+
|
230 |
+
dashboard += "\n## 💻 System Resources\n"
|
231 |
+
sys_resources = report.get("system_resources", {})
|
232 |
+
if "memory" in sys_resources:
|
233 |
+
memory = sys_resources["memory"]
|
234 |
+
dashboard += f"- **Memory:** {memory['percent_used']:.1f}% used ({memory['available_gb']:.1f}GB available)\n"
|
235 |
+
|
236 |
+
if "cpu_percent" in sys_resources:
|
237 |
+
dashboard += f"- **CPU:** {sys_resources['cpu_percent']:.1f}% usage\n"
|
238 |
+
|
239 |
+
if "process" in sys_resources:
|
240 |
+
process = sys_resources["process"]
|
241 |
+
dashboard += f"- **Process Memory:** {process['memory_mb']:.1f}MB\n"
|
242 |
+
|
243 |
+
dashboard += "\n## 📊 Performance Metrics\n"
|
244 |
+
perf_metrics = report.get("performance_metrics", {})
|
245 |
+
if perf_metrics and not perf_metrics.get("error"):
|
246 |
+
for metric_name, metric_data in perf_metrics.items():
|
247 |
+
if isinstance(metric_data, dict) and "average" in metric_data:
|
248 |
+
dashboard += f"- **{metric_name}:** Avg: {metric_data['average']:.3f}, Count: {metric_data['count']}\n"
|
249 |
+
|
250 |
+
dashboard += "\n## 🔧 Cache System\n"
|
251 |
+
cache_info = report.get("cache_system", {})
|
252 |
+
if cache_info.get("status") == "healthy":
|
253 |
+
dashboard += f"- **Cache Files:** {cache_info.get('cache_files_count', 0)} files\n"
|
254 |
+
dashboard += f"- **Cache Size:** {cache_info.get('total_size_mb', 0):.1f}MB\n"
|
255 |
+
|
256 |
+
if report.get("unhealthy_components"):
|
257 |
+
dashboard += "\n## ⚠️ Issues Detected\n"
|
258 |
+
for component in report["unhealthy_components"]:
|
259 |
+
dashboard += f"- {component}\n"
|
260 |
+
|
261 |
+
return dashboard
|
mcp_hub/logging_config.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Logging configuration for the MCP Hub project."""
|
2 |
+
|
3 |
+
import logging
|
4 |
+
import sys
|
5 |
+
from datetime import datetime
|
6 |
+
from pathlib import Path
|
7 |
+
|
8 |
+
def setup_logging(
|
9 |
+
log_level: str = "INFO",
|
10 |
+
log_to_file: bool = True,
|
11 |
+
log_dir: str = "logs"
|
12 |
+
) -> logging.Logger:
|
13 |
+
"""Set up logging configuration."""
|
14 |
+
|
15 |
+
# Create logs directory if it doesn't exist
|
16 |
+
if log_to_file:
|
17 |
+
log_path = Path(log_dir)
|
18 |
+
log_path.mkdir(exist_ok=True)
|
19 |
+
|
20 |
+
# Create logger
|
21 |
+
logger = logging.getLogger("mcp_hub")
|
22 |
+
logger.setLevel(getattr(logging, log_level.upper()))
|
23 |
+
|
24 |
+
# Clear any existing handlers
|
25 |
+
logger.handlers = []
|
26 |
+
|
27 |
+
# Create formatter
|
28 |
+
formatter = logging.Formatter(
|
29 |
+
"%(asctime)s - %(name)s - %(levelname)s - %(funcName)s:%(lineno)d - %(message)s"
|
30 |
+
)
|
31 |
+
|
32 |
+
# Console handler
|
33 |
+
console_handler = logging.StreamHandler(sys.stdout)
|
34 |
+
console_handler.setLevel(getattr(logging, log_level.upper()))
|
35 |
+
console_handler.setFormatter(formatter)
|
36 |
+
logger.addHandler(console_handler)
|
37 |
+
|
38 |
+
# File handler
|
39 |
+
if log_to_file:
|
40 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
41 |
+
file_handler = logging.FileHandler(
|
42 |
+
log_path / f"mcp_hub_{timestamp}.log"
|
43 |
+
)
|
44 |
+
file_handler.setLevel(logging.DEBUG)
|
45 |
+
file_handler.setFormatter(formatter)
|
46 |
+
logger.addHandler(file_handler)
|
47 |
+
|
48 |
+
return logger
|
49 |
+
|
50 |
+
# Create global logger instance
|
51 |
+
logger = setup_logging()
|
mcp_hub/package_utils.py
ADDED
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Package management utilities for dynamic package installation in Modal sandboxes.
|
3 |
+
This module provides functions to analyze code for imports and manage package installation.
|
4 |
+
"""
|
5 |
+
import ast
|
6 |
+
import re
|
7 |
+
from typing import Set, List
|
8 |
+
|
9 |
+
try:
|
10 |
+
from mcp_hub.logging_config import logger
|
11 |
+
except ImportError:
|
12 |
+
# Fallback logger for testing/standalone use
|
13 |
+
import logging
|
14 |
+
logger = logging.getLogger(__name__)
|
15 |
+
|
16 |
+
|
17 |
+
# Core packages that should be preinstalled in the base image
|
18 |
+
CORE_PREINSTALLED_PACKAGES = {
|
19 |
+
"numpy", "pandas", "matplotlib", "requests", "json", "os", "sys",
|
20 |
+
"time", "datetime", "math", "random", "collections", "itertools",
|
21 |
+
"functools", "re", "urllib", "csv", "sqlite3", "pathlib", "typing",
|
22 |
+
"asyncio", "threading", "multiprocessing", "subprocess", "shutil",
|
23 |
+
"tempfile", "io", "gzip", "zipfile", "tarfile", "base64", "hashlib",
|
24 |
+
"secrets", "uuid", "pickle", "copy", "operator", "bisect", "heapq",
|
25 |
+
"contextlib", "weakref", "gc", "inspect", "types", "enum", "dataclasses",
|
26 |
+
"decimal", "fractions", "statistics", "string", "textwrap", "locale",
|
27 |
+
"calendar", "timeit", "argparse", "getopt", "logging", "warnings",
|
28 |
+
"platform", "signal", "errno", "ctypes", "struct", "array", "queue",
|
29 |
+
"socketserver", "http", "urllib2", "html", "xml", "email", "mailbox"
|
30 |
+
}
|
31 |
+
|
32 |
+
# Extended packages that can be dynamically installed
|
33 |
+
COMMON_PACKAGES = {
|
34 |
+
"scikit-learn": "sklearn",
|
35 |
+
"beautifulsoup4": "bs4",
|
36 |
+
"pillow": "PIL",
|
37 |
+
"opencv-python-headless": "cv2",
|
38 |
+
"python-dateutil": "dateutil",
|
39 |
+
"plotly": "plotly",
|
40 |
+
"seaborn": "seaborn",
|
41 |
+
"polars": "polars",
|
42 |
+
"lightgbm": "lightgbm",
|
43 |
+
"xgboost": "xgboost",
|
44 |
+
"flask": "flask",
|
45 |
+
"fastapi": "fastapi",
|
46 |
+
"httpx": "httpx",
|
47 |
+
"networkx": "networkx",
|
48 |
+
"wordcloud": "wordcloud",
|
49 |
+
"textblob": "textblob",
|
50 |
+
"spacy": "spacy",
|
51 |
+
"nltk": "nltk"
|
52 |
+
}
|
53 |
+
|
54 |
+
# Map import names to package names
|
55 |
+
IMPORT_TO_PACKAGE = {v: k for k, v in COMMON_PACKAGES.items()}
|
56 |
+
IMPORT_TO_PACKAGE.update({k: k for k in COMMON_PACKAGES.keys()})
|
57 |
+
|
58 |
+
|
59 |
+
def extract_imports_from_code(code_str: str) -> Set[str]:
|
60 |
+
"""
|
61 |
+
Extract all import statements from Python code using AST parsing.
|
62 |
+
|
63 |
+
Args:
|
64 |
+
code_str: The Python code to analyze
|
65 |
+
|
66 |
+
Returns:
|
67 |
+
Set of imported module names (top-level only)
|
68 |
+
"""
|
69 |
+
imports = set()
|
70 |
+
|
71 |
+
try:
|
72 |
+
tree = ast.parse(code_str)
|
73 |
+
for node in ast.walk(tree):
|
74 |
+
if isinstance(node, ast.Import):
|
75 |
+
for alias in node.names:
|
76 |
+
# Get top-level module name
|
77 |
+
module_name = alias.name.split('.')[0]
|
78 |
+
imports.add(module_name)
|
79 |
+
elif isinstance(node, ast.ImportFrom):
|
80 |
+
if node.module:
|
81 |
+
# Get top-level module name
|
82 |
+
module_name = node.module.split('.')[0]
|
83 |
+
imports.add(module_name)
|
84 |
+
except Exception as e:
|
85 |
+
logger.warning(f"Failed to parse code with AST, falling back to regex: {e}")
|
86 |
+
# Fallback to regex-based extraction
|
87 |
+
imports.update(extract_imports_with_regex(code_str))
|
88 |
+
|
89 |
+
return imports
|
90 |
+
|
91 |
+
|
92 |
+
def extract_imports_with_regex(code_str: str) -> Set[str]:
|
93 |
+
"""
|
94 |
+
Fallback method to extract imports using regex patterns.
|
95 |
+
|
96 |
+
Args:
|
97 |
+
code_str: The Python code to analyze
|
98 |
+
|
99 |
+
Returns:
|
100 |
+
Set of imported module names
|
101 |
+
"""
|
102 |
+
imports = set()
|
103 |
+
|
104 |
+
# Pattern for "import module" statements
|
105 |
+
import_pattern = r'^import\s+([a-zA-Z_][a-zA-Z0-9_]*(?:\.[a-zA-Z_][a-zA-Z0-9_]*)*)'
|
106 |
+
|
107 |
+
# Pattern for "from module import ..." statements
|
108 |
+
from_pattern = r'^from\s+([a-zA-Z_][a-zA-Z0-9_]*(?:\.[a-zA-Z_][a-zA-Z0-9_]*)*)\s+import'
|
109 |
+
|
110 |
+
for line in code_str.split('\n'):
|
111 |
+
line = line.strip()
|
112 |
+
if not line or line.startswith('#'):
|
113 |
+
continue
|
114 |
+
|
115 |
+
# Check for import statements
|
116 |
+
import_match = re.match(import_pattern, line)
|
117 |
+
if import_match:
|
118 |
+
module_name = import_match.group(1).split('.')[0]
|
119 |
+
imports.add(module_name)
|
120 |
+
continue
|
121 |
+
|
122 |
+
# Check for from...import statements
|
123 |
+
from_match = re.match(from_pattern, line)
|
124 |
+
if from_match:
|
125 |
+
module_name = from_match.group(1).split('.')[0]
|
126 |
+
imports.add(module_name)
|
127 |
+
|
128 |
+
return imports
|
129 |
+
|
130 |
+
|
131 |
+
def get_packages_to_install(detected_imports: Set[str]) -> List[str]:
|
132 |
+
"""
|
133 |
+
Determine which packages need to be installed based on detected imports.
|
134 |
+
|
135 |
+
Args:
|
136 |
+
detected_imports: Set of module names found in the code
|
137 |
+
|
138 |
+
Returns:
|
139 |
+
List of package names that need to be pip installed
|
140 |
+
"""
|
141 |
+
packages_to_install = []
|
142 |
+
|
143 |
+
for import_name in detected_imports:
|
144 |
+
# Skip if it's a core preinstalled package
|
145 |
+
if import_name in CORE_PREINSTALLED_PACKAGES:
|
146 |
+
continue
|
147 |
+
|
148 |
+
# Check if we have a known package mapping
|
149 |
+
if import_name in IMPORT_TO_PACKAGE:
|
150 |
+
package_name = IMPORT_TO_PACKAGE[import_name]
|
151 |
+
packages_to_install.append(package_name)
|
152 |
+
# For unknown packages, assume package name matches import name
|
153 |
+
elif import_name not in CORE_PREINSTALLED_PACKAGES:
|
154 |
+
packages_to_install.append(import_name)
|
155 |
+
|
156 |
+
return packages_to_install
|
157 |
+
|
158 |
+
|
159 |
+
def get_warmup_import_commands() -> List[str]:
|
160 |
+
"""
|
161 |
+
Get list of import commands to run during sandbox warmup.
|
162 |
+
|
163 |
+
Returns:
|
164 |
+
List of Python import statements for core packages
|
165 |
+
"""
|
166 |
+
core_imports = [
|
167 |
+
"import numpy",
|
168 |
+
"import pandas",
|
169 |
+
"import matplotlib.pyplot",
|
170 |
+
"import requests",
|
171 |
+
"print('Core packages warmed up successfully')"
|
172 |
+
]
|
173 |
+
|
174 |
+
return core_imports
|
175 |
+
|
176 |
+
|
177 |
+
def create_package_install_command(packages: List[str]) -> str:
|
178 |
+
"""
|
179 |
+
Create a pip install command for the given packages.
|
180 |
+
|
181 |
+
Args:
|
182 |
+
packages: List of package names to install
|
183 |
+
|
184 |
+
Returns:
|
185 |
+
Pip install command string
|
186 |
+
"""
|
187 |
+
if not packages:
|
188 |
+
return ""
|
189 |
+
|
190 |
+
# Remove duplicates and sort
|
191 |
+
unique_packages = sorted(set(packages))
|
192 |
+
return f"pip install {' '.join(unique_packages)}"
|
mcp_hub/performance_monitoring.py
ADDED
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Performance monitoring and metrics collection for the MCP Hub."""
|
2 |
+
|
3 |
+
import time
|
4 |
+
import psutil
|
5 |
+
import threading
|
6 |
+
from datetime import datetime, timedelta
|
7 |
+
from typing import Dict, Any, Optional
|
8 |
+
from collections import defaultdict, deque
|
9 |
+
from dataclasses import dataclass
|
10 |
+
from contextlib import contextmanager
|
11 |
+
from .logging_config import logger
|
12 |
+
|
13 |
+
@dataclass
|
14 |
+
class MetricPoint:
|
15 |
+
"""Single metric measurement."""
|
16 |
+
timestamp: datetime
|
17 |
+
metric_name: str
|
18 |
+
value: float
|
19 |
+
tags: Dict[str, str]
|
20 |
+
|
21 |
+
class MetricsCollector:
|
22 |
+
"""Collects and stores application metrics."""
|
23 |
+
|
24 |
+
def __init__(self, max_points: int = 10000):
|
25 |
+
"""
|
26 |
+
Initialize metrics collector.
|
27 |
+
|
28 |
+
Args:
|
29 |
+
max_points: Maximum number of metric points to store
|
30 |
+
"""
|
31 |
+
self.max_points = max_points
|
32 |
+
self.metrics = defaultdict(lambda: deque(maxlen=max_points))
|
33 |
+
self.lock = threading.Lock()
|
34 |
+
self.counters = defaultdict(int)
|
35 |
+
self.timers = {}
|
36 |
+
|
37 |
+
# Start system metrics collection thread
|
38 |
+
self.system_thread = threading.Thread(target=self._collect_system_metrics, daemon=True)
|
39 |
+
self.system_thread.start()
|
40 |
+
logger.info("Metrics collector initialized")
|
41 |
+
|
42 |
+
def record_metric(self, name: str, value: float, tags: Optional[Dict[str, str]] = None):
|
43 |
+
"""Record a metric value."""
|
44 |
+
if tags is None:
|
45 |
+
tags = {}
|
46 |
+
|
47 |
+
point = MetricPoint(
|
48 |
+
timestamp=datetime.now(),
|
49 |
+
metric_name=name,
|
50 |
+
value=value,
|
51 |
+
tags=tags
|
52 |
+
)
|
53 |
+
|
54 |
+
with self.lock:
|
55 |
+
self.metrics[name].append(point)
|
56 |
+
|
57 |
+
def increment_counter(self, name: str, amount: int = 1, tags: Optional[Dict[str, str]] = None):
|
58 |
+
"""Increment a counter metric."""
|
59 |
+
with self.lock:
|
60 |
+
self.counters[name] += amount
|
61 |
+
|
62 |
+
self.record_metric(f"{name}_count", self.counters[name], tags)
|
63 |
+
|
64 |
+
@contextmanager
|
65 |
+
def timer(self, name: str, tags: Optional[Dict[str, str]] = None):
|
66 |
+
"""Context manager for timing operations."""
|
67 |
+
start_time = time.time()
|
68 |
+
try:
|
69 |
+
yield
|
70 |
+
finally:
|
71 |
+
duration = time.time() - start_time
|
72 |
+
self.record_metric(f"{name}_duration_seconds", duration, tags)
|
73 |
+
|
74 |
+
def get_metrics_summary(self,
|
75 |
+
metric_name: Optional[str] = None,
|
76 |
+
last_minutes: int = 5) -> Dict[str, Any]:
|
77 |
+
"""Get summary statistics for metrics."""
|
78 |
+
cutoff_time = datetime.now() - timedelta(minutes=last_minutes)
|
79 |
+
|
80 |
+
with self.lock:
|
81 |
+
if metric_name:
|
82 |
+
metrics_to_analyze = {metric_name: self.metrics[metric_name]}
|
83 |
+
else:
|
84 |
+
metrics_to_analyze = dict(self.metrics)
|
85 |
+
|
86 |
+
summary = {}
|
87 |
+
|
88 |
+
for name, points in metrics_to_analyze.items():
|
89 |
+
recent_points = [p for p in points if p.timestamp >= cutoff_time]
|
90 |
+
|
91 |
+
if not recent_points:
|
92 |
+
continue
|
93 |
+
|
94 |
+
values = [p.value for p in recent_points]
|
95 |
+
summary[name] = {
|
96 |
+
"count": len(values),
|
97 |
+
"average": sum(values) / len(values),
|
98 |
+
"min": min(values),
|
99 |
+
"max": max(values),
|
100 |
+
"latest": values[-1] if values else 0,
|
101 |
+
"last_updated": recent_points[-1].timestamp.isoformat() if recent_points else None
|
102 |
+
}
|
103 |
+
|
104 |
+
return summary
|
105 |
+
|
106 |
+
def _collect_system_metrics(self):
|
107 |
+
"""Background thread to collect system metrics."""
|
108 |
+
while True:
|
109 |
+
try:
|
110 |
+
# CPU and memory metrics
|
111 |
+
cpu_percent = psutil.cpu_percent(interval=1)
|
112 |
+
memory = psutil.virtual_memory()
|
113 |
+
|
114 |
+
self.record_metric("system_cpu_percent", cpu_percent)
|
115 |
+
self.record_metric("system_memory_percent", memory.percent)
|
116 |
+
self.record_metric("system_memory_available_mb", memory.available / 1024 / 1024)
|
117 |
+
|
118 |
+
# Process-specific metrics
|
119 |
+
process = psutil.Process()
|
120 |
+
process_memory = process.memory_info()
|
121 |
+
|
122 |
+
self.record_metric("process_memory_rss_mb", process_memory.rss / 1024 / 1024)
|
123 |
+
self.record_metric("process_cpu_percent", process.cpu_percent())
|
124 |
+
|
125 |
+
time.sleep(30) # Collect every 30 seconds
|
126 |
+
|
127 |
+
except Exception as e:
|
128 |
+
logger.error(f"Error collecting system metrics: {e}")
|
129 |
+
time.sleep(60) # Wait longer if there's an error
|
130 |
+
|
131 |
+
class PerformanceProfiler:
|
132 |
+
"""Profile performance of agent operations."""
|
133 |
+
|
134 |
+
def __init__(self, metrics_collector: MetricsCollector):
|
135 |
+
self.metrics = metrics_collector
|
136 |
+
self.operation_stats = defaultdict(list)
|
137 |
+
|
138 |
+
@contextmanager
|
139 |
+
def profile_operation(self, operation_name: str, **tags):
|
140 |
+
"""Context manager to profile an operation."""
|
141 |
+
start_time = time.time()
|
142 |
+
start_memory = psutil.Process().memory_info().rss
|
143 |
+
|
144 |
+
try:
|
145 |
+
yield
|
146 |
+
success = True
|
147 |
+
except Exception as e:
|
148 |
+
success = False
|
149 |
+
logger.error(f"Operation {operation_name} failed: {e}")
|
150 |
+
raise
|
151 |
+
finally:
|
152 |
+
end_time = time.time()
|
153 |
+
end_memory = psutil.Process().memory_info().rss
|
154 |
+
|
155 |
+
duration = end_time - start_time
|
156 |
+
memory_delta = (end_memory - start_memory) / 1024 / 1024 # MB
|
157 |
+
|
158 |
+
# Record metrics
|
159 |
+
operation_tags = {"operation": operation_name, "success": str(success), **tags}
|
160 |
+
self.metrics.record_metric("operation_duration_seconds", duration, operation_tags)
|
161 |
+
self.metrics.record_metric("operation_memory_delta_mb", memory_delta, operation_tags)
|
162 |
+
|
163 |
+
# Update operation stats
|
164 |
+
self.operation_stats[operation_name].append({
|
165 |
+
"duration": duration,
|
166 |
+
"memory_delta": memory_delta,
|
167 |
+
"success": success,
|
168 |
+
"timestamp": datetime.now()
|
169 |
+
})
|
170 |
+
|
171 |
+
def get_operation_summary(self, operation_name: str = None) -> Dict[str, Any]:
|
172 |
+
"""Get summary of operation performance."""
|
173 |
+
if operation_name:
|
174 |
+
operations_to_analyze = {operation_name: self.operation_stats[operation_name]}
|
175 |
+
else:
|
176 |
+
operations_to_analyze = dict(self.operation_stats)
|
177 |
+
|
178 |
+
summary = {}
|
179 |
+
|
180 |
+
for op_name, stats in operations_to_analyze.items():
|
181 |
+
if not stats:
|
182 |
+
continue
|
183 |
+
|
184 |
+
durations = [s["duration"] for s in stats]
|
185 |
+
memory_deltas = [s["memory_delta"] for s in stats]
|
186 |
+
success_rate = sum(1 for s in stats if s["success"]) / len(stats)
|
187 |
+
|
188 |
+
summary[op_name] = {
|
189 |
+
"total_calls": len(stats),
|
190 |
+
"success_rate": success_rate,
|
191 |
+
"avg_duration_seconds": sum(durations) / len(durations),
|
192 |
+
"avg_memory_delta_mb": sum(memory_deltas) / len(memory_deltas),
|
193 |
+
"min_duration": min(durations),
|
194 |
+
"max_duration": max(durations)
|
195 |
+
}
|
196 |
+
|
197 |
+
return summary
|
198 |
+
|
199 |
+
# Global instances
|
200 |
+
metrics_collector = MetricsCollector()
|
201 |
+
performance_profiler = PerformanceProfiler(metrics_collector)
|
202 |
+
|
203 |
+
# Convenience decorators
|
204 |
+
def track_performance(operation_name: str = None):
|
205 |
+
"""Decorator to automatically track function performance."""
|
206 |
+
def decorator(func):
|
207 |
+
nonlocal operation_name
|
208 |
+
if operation_name is None:
|
209 |
+
operation_name = f"{func.__module__}.{func.__name__}"
|
210 |
+
|
211 |
+
def wrapper(*args, **kwargs):
|
212 |
+
with performance_profiler.profile_operation(operation_name):
|
213 |
+
result = func(*args, **kwargs)
|
214 |
+
metrics_collector.increment_counter(f"{operation_name}_calls")
|
215 |
+
return result
|
216 |
+
return wrapper
|
217 |
+
return decorator
|
218 |
+
|
219 |
+
def track_api_call(service_name: str):
|
220 |
+
"""Decorator specifically for tracking API calls."""
|
221 |
+
def decorator(func):
|
222 |
+
def wrapper(*args, **kwargs):
|
223 |
+
with performance_profiler.profile_operation("api_call", service=service_name):
|
224 |
+
try:
|
225 |
+
result = func(*args, **kwargs)
|
226 |
+
metrics_collector.increment_counter("api_calls_success", tags={"service": service_name})
|
227 |
+
return result
|
228 |
+
except Exception:
|
229 |
+
metrics_collector.increment_counter("api_calls_failed", tags={"service": service_name})
|
230 |
+
raise
|
231 |
+
return wrapper
|
232 |
+
return decorator
|
mcp_hub/reliability_utils.py
ADDED
@@ -0,0 +1,254 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Rate limiting and circuit breaker patterns for robust API interactions."""
|
2 |
+
|
3 |
+
import time
|
4 |
+
from datetime import datetime
|
5 |
+
from typing import Callable, Any, Dict
|
6 |
+
from functools import wraps
|
7 |
+
from threading import Lock
|
8 |
+
from collections import deque
|
9 |
+
from .exceptions import APIError
|
10 |
+
from .logging_config import logger
|
11 |
+
|
12 |
+
class RateLimiter:
|
13 |
+
"""Token bucket rate limiter for API calls."""
|
14 |
+
|
15 |
+
def __init__(self, calls_per_second: float = 1.0, burst_size: int = 5):
|
16 |
+
"""
|
17 |
+
Initialize rate limiter.
|
18 |
+
|
19 |
+
Args:
|
20 |
+
calls_per_second: Maximum calls per second
|
21 |
+
burst_size: Maximum burst of calls allowed
|
22 |
+
"""
|
23 |
+
self.calls_per_second = calls_per_second
|
24 |
+
self.burst_size = float(burst_size)
|
25 |
+
self.tokens = float(burst_size)
|
26 |
+
self.last_update = time.time()
|
27 |
+
self.lock = Lock()
|
28 |
+
|
29 |
+
def acquire(self, timeout: float = None) -> bool:
|
30 |
+
"""
|
31 |
+
Acquire a token for making an API call.
|
32 |
+
|
33 |
+
Args:
|
34 |
+
timeout: Maximum time to wait for a token
|
35 |
+
|
36 |
+
Returns:
|
37 |
+
True if token acquired, False if timeout
|
38 |
+
"""
|
39 |
+
start_time = time.time()
|
40 |
+
|
41 |
+
while True:
|
42 |
+
with self.lock:
|
43 |
+
now = time.time()
|
44 |
+
# Add tokens based on elapsed time
|
45 |
+
time_passed = now - self.last_update
|
46 |
+
self.tokens = min(
|
47 |
+
self.burst_size,
|
48 |
+
self.tokens + time_passed * self.calls_per_second
|
49 |
+
)
|
50 |
+
self.last_update = now
|
51 |
+
|
52 |
+
if self.tokens >= 1:
|
53 |
+
self.tokens -= 1
|
54 |
+
return True
|
55 |
+
|
56 |
+
# Check timeout
|
57 |
+
if timeout and (time.time() - start_time) >= timeout:
|
58 |
+
return False
|
59 |
+
|
60 |
+
# Wait before retrying
|
61 |
+
time.sleep(0.1)
|
62 |
+
|
63 |
+
class CircuitBreaker:
|
64 |
+
"""Circuit breaker pattern for handling API failures gracefully."""
|
65 |
+
|
66 |
+
def __init__(
|
67 |
+
self,
|
68 |
+
failure_threshold: int = 5,
|
69 |
+
timeout: int = 60,
|
70 |
+
expected_exception: type = Exception
|
71 |
+
):
|
72 |
+
"""
|
73 |
+
Initialize circuit breaker.
|
74 |
+
|
75 |
+
Args:
|
76 |
+
failure_threshold: Number of failures before opening circuit
|
77 |
+
timeout: Seconds to wait before trying again
|
78 |
+
expected_exception: Exception type that triggers circuit breaker
|
79 |
+
"""
|
80 |
+
self.failure_threshold = failure_threshold
|
81 |
+
self.timeout = timeout
|
82 |
+
self.expected_exception = expected_exception
|
83 |
+
|
84 |
+
self.failure_count = 0
|
85 |
+
self.last_failure_time = None
|
86 |
+
self.state = "CLOSED" # CLOSED, OPEN, HALF_OPEN
|
87 |
+
self.lock = Lock()
|
88 |
+
|
89 |
+
def _can_attempt(self) -> bool:
|
90 |
+
"""Check if we can attempt the operation."""
|
91 |
+
if self.state == "CLOSED":
|
92 |
+
return True
|
93 |
+
elif self.state == "OPEN":
|
94 |
+
if (datetime.now() - self.last_failure_time).seconds >= self.timeout:
|
95 |
+
self.state = "HALF_OPEN"
|
96 |
+
return True
|
97 |
+
return False
|
98 |
+
else: # HALF_OPEN
|
99 |
+
return True
|
100 |
+
|
101 |
+
def _record_success(self):
|
102 |
+
"""Record a successful operation."""
|
103 |
+
self.failure_count = 0
|
104 |
+
self.state = "CLOSED"
|
105 |
+
|
106 |
+
def _record_failure(self):
|
107 |
+
"""Record a failed operation."""
|
108 |
+
self.failure_count += 1
|
109 |
+
self.last_failure_time = datetime.now()
|
110 |
+
|
111 |
+
if self.failure_count >= self.failure_threshold:
|
112 |
+
self.state = "OPEN"
|
113 |
+
logger.warning(f"Circuit breaker opened after {self.failure_count} failures")
|
114 |
+
|
115 |
+
def call(self, func: Callable, *args, **kwargs) -> Any:
|
116 |
+
"""
|
117 |
+
Execute function with circuit breaker protection.
|
118 |
+
|
119 |
+
Args:
|
120 |
+
func: Function to execute
|
121 |
+
*args, **kwargs: Arguments for the function
|
122 |
+
|
123 |
+
Returns:
|
124 |
+
Function result
|
125 |
+
|
126 |
+
Raises:
|
127 |
+
APIError: If circuit is open or function fails
|
128 |
+
"""
|
129 |
+
with self.lock:
|
130 |
+
if not self._can_attempt():
|
131 |
+
raise APIError(
|
132 |
+
"CircuitBreaker",
|
133 |
+
f"Circuit breaker is OPEN. Last failure: {self.last_failure_time}"
|
134 |
+
)
|
135 |
+
|
136 |
+
try:
|
137 |
+
result = func(*args, **kwargs)
|
138 |
+
with self.lock:
|
139 |
+
self._record_success()
|
140 |
+
return result
|
141 |
+
|
142 |
+
except self.expected_exception as e:
|
143 |
+
with self.lock:
|
144 |
+
self._record_failure()
|
145 |
+
logger.error(f"Circuit breaker recorded failure: {str(e)}")
|
146 |
+
raise APIError("CircuitBreaker", f"Protected function failed: {str(e)}")
|
147 |
+
|
148 |
+
# Global instances for different services
|
149 |
+
nebius_rate_limiter = RateLimiter(calls_per_second=2.0, burst_size=5)
|
150 |
+
tavily_rate_limiter = RateLimiter(calls_per_second=1.0, burst_size=3)
|
151 |
+
|
152 |
+
nebius_circuit_breaker = CircuitBreaker(failure_threshold=3, timeout=30)
|
153 |
+
tavily_circuit_breaker = CircuitBreaker(failure_threshold=3, timeout=30)
|
154 |
+
|
155 |
+
def rate_limited(service: str = "default", timeout: float = 10.0):
|
156 |
+
"""
|
157 |
+
Decorator to rate limit function calls.
|
158 |
+
|
159 |
+
Args:
|
160 |
+
service: Service name (nebius, tavily, or default)
|
161 |
+
timeout: Maximum time to wait for rate limit token
|
162 |
+
"""
|
163 |
+
def decorator(func: Callable):
|
164 |
+
@wraps(func)
|
165 |
+
def wrapper(*args, **kwargs):
|
166 |
+
# Select appropriate rate limiter
|
167 |
+
if service == "nebius":
|
168 |
+
limiter = nebius_rate_limiter
|
169 |
+
elif service == "tavily":
|
170 |
+
limiter = tavily_rate_limiter
|
171 |
+
else:
|
172 |
+
limiter = RateLimiter() # Default limiter
|
173 |
+
|
174 |
+
if not limiter.acquire(timeout=timeout):
|
175 |
+
raise APIError(service, f"Rate limit timeout after {timeout}s")
|
176 |
+
|
177 |
+
return func(*args, **kwargs)
|
178 |
+
return wrapper
|
179 |
+
return decorator
|
180 |
+
|
181 |
+
def circuit_protected(service: str = "default"):
|
182 |
+
"""
|
183 |
+
Decorator to protect function calls with circuit breaker.
|
184 |
+
|
185 |
+
Args:
|
186 |
+
service: Service name (nebius, tavily, or default)
|
187 |
+
"""
|
188 |
+
def decorator(func: Callable):
|
189 |
+
@wraps(func)
|
190 |
+
def wrapper(*args, **kwargs):
|
191 |
+
# Select appropriate circuit breaker
|
192 |
+
if service == "nebius":
|
193 |
+
breaker = nebius_circuit_breaker
|
194 |
+
elif service == "tavily":
|
195 |
+
breaker = tavily_circuit_breaker
|
196 |
+
else:
|
197 |
+
breaker = CircuitBreaker() # Default breaker
|
198 |
+
|
199 |
+
return breaker.call(func, *args, **kwargs)
|
200 |
+
return wrapper
|
201 |
+
return decorator
|
202 |
+
|
203 |
+
class APIHealthMonitor:
|
204 |
+
"""Monitor API health and performance metrics."""
|
205 |
+
|
206 |
+
def __init__(self, window_size: int = 100):
|
207 |
+
"""
|
208 |
+
Initialize health monitor.
|
209 |
+
|
210 |
+
Args:
|
211 |
+
window_size: Number of recent calls to track
|
212 |
+
"""
|
213 |
+
self.window_size = window_size
|
214 |
+
self.call_history = deque(maxlen=window_size)
|
215 |
+
self.lock = Lock()
|
216 |
+
|
217 |
+
def record_call(self, service: str, success: bool, response_time: float):
|
218 |
+
"""Record an API call result."""
|
219 |
+
with self.lock:
|
220 |
+
self.call_history.append({
|
221 |
+
"service": service,
|
222 |
+
"success": success,
|
223 |
+
"response_time": response_time,
|
224 |
+
"timestamp": datetime.now()
|
225 |
+
})
|
226 |
+
|
227 |
+
def get_health_stats(self, service: str = None) -> Dict[str, Any]:
|
228 |
+
"""Get health statistics for a service or all services."""
|
229 |
+
with self.lock:
|
230 |
+
if service:
|
231 |
+
calls = [call for call in self.call_history if call["service"] == service]
|
232 |
+
else:
|
233 |
+
calls = list(self.call_history)
|
234 |
+
|
235 |
+
if not calls:
|
236 |
+
return {"error": "No call history available"}
|
237 |
+
|
238 |
+
total_calls = len(calls)
|
239 |
+
successful_calls = sum(1 for call in calls if call["success"])
|
240 |
+
success_rate = successful_calls / total_calls
|
241 |
+
|
242 |
+
response_times = [call["response_time"] for call in calls]
|
243 |
+
avg_response_time = sum(response_times) / len(response_times)
|
244 |
+
|
245 |
+
return {
|
246 |
+
"service": service or "all",
|
247 |
+
"total_calls": total_calls,
|
248 |
+
"success_rate": success_rate,
|
249 |
+
"avg_response_time_ms": avg_response_time * 1000,
|
250 |
+
"recent_failures": total_calls - successful_calls
|
251 |
+
}
|
252 |
+
|
253 |
+
# Global health monitor
|
254 |
+
health_monitor = APIHealthMonitor()
|
mcp_hub/sandbox_pool.py
ADDED
@@ -0,0 +1,701 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Warm Sandbox Pool for Modal - Async Queue-Based Implementation
|
3 |
+
This module provides a pre-warmed pool of Modal sandboxes to reduce cold-start latency.
|
4 |
+
"""
|
5 |
+
import asyncio
|
6 |
+
import time
|
7 |
+
from typing import Optional, Dict, Any
|
8 |
+
from contextlib import asynccontextmanager
|
9 |
+
from dataclasses import dataclass
|
10 |
+
from enum import Enum
|
11 |
+
|
12 |
+
import modal
|
13 |
+
|
14 |
+
from mcp_hub.logging_config import logger
|
15 |
+
from mcp_hub.exceptions import CodeExecutionError
|
16 |
+
|
17 |
+
|
18 |
+
class SandboxHealth(Enum):
|
19 |
+
"""Sandbox health status."""
|
20 |
+
HEALTHY = "healthy"
|
21 |
+
UNHEALTHY = "unhealthy"
|
22 |
+
UNKNOWN = "unknown"
|
23 |
+
|
24 |
+
|
25 |
+
@dataclass
|
26 |
+
class PooledSandbox:
|
27 |
+
"""Container for a pooled sandbox with metadata."""
|
28 |
+
sandbox: modal.Sandbox
|
29 |
+
created_at: float
|
30 |
+
last_used: float
|
31 |
+
health: SandboxHealth = SandboxHealth.UNKNOWN
|
32 |
+
use_count: int = 0
|
33 |
+
|
34 |
+
|
35 |
+
class WarmSandboxPool:
|
36 |
+
"""Async queue-based warm sandbox pool with health checking."""
|
37 |
+
|
38 |
+
def __init__(
|
39 |
+
self,
|
40 |
+
app: modal.App,
|
41 |
+
image: modal.Image,
|
42 |
+
pool_size: int = 3,
|
43 |
+
max_age_seconds: int = 300, # 5 minutes
|
44 |
+
max_uses_per_sandbox: int = 10,
|
45 |
+
health_check_interval: int = 60, # 1 minute
|
46 |
+
):
|
47 |
+
self.app = app
|
48 |
+
self.image = image
|
49 |
+
self.pool_size = pool_size
|
50 |
+
self.max_age_seconds = max_age_seconds
|
51 |
+
self.max_uses_per_sandbox = max_uses_per_sandbox
|
52 |
+
self.health_check_interval = health_check_interval
|
53 |
+
|
54 |
+
# Queue to hold available sandboxes
|
55 |
+
self._sandbox_queue: asyncio.Queue[PooledSandbox] = asyncio.Queue(maxsize=pool_size)
|
56 |
+
|
57 |
+
# Background tasks
|
58 |
+
self._warmup_task: Optional[asyncio.Task] = None
|
59 |
+
self._health_check_task: Optional[asyncio.Task] = None
|
60 |
+
self._cleanup_task: Optional[asyncio.Task] = None
|
61 |
+
|
62 |
+
# Pool statistics
|
63 |
+
self._stats = {
|
64 |
+
"created": 0,
|
65 |
+
"reused": 0,
|
66 |
+
"recycled": 0,
|
67 |
+
"health_checks": 0,
|
68 |
+
"failures": 0
|
69 |
+
}
|
70 |
+
|
71 |
+
# Health tracking for better error recovery
|
72 |
+
self._consecutive_failures = 0
|
73 |
+
self._last_successful_creation = time.time()
|
74 |
+
self._pool_reset_threshold = 5 # Reset pool after 5 consecutive failures
|
75 |
+
|
76 |
+
self._running = False
|
77 |
+
|
78 |
+
async def start(self):
|
79 |
+
"""Start the pool and background tasks."""
|
80 |
+
if self._running:
|
81 |
+
return
|
82 |
+
|
83 |
+
self._running = True
|
84 |
+
logger.info(f"Starting warm sandbox pool with {self.pool_size} sandboxes")
|
85 |
+
|
86 |
+
# Start background tasks
|
87 |
+
self._warmup_task = asyncio.create_task(self._warmup_pool())
|
88 |
+
self._health_check_task = asyncio.create_task(self._health_check_loop())
|
89 |
+
self._cleanup_task = asyncio.create_task(self._cleanup_loop())
|
90 |
+
|
91 |
+
# Wait for initial warmup
|
92 |
+
await asyncio.sleep(1) # Give warmup a moment to start
|
93 |
+
|
94 |
+
async def stop(self):
|
95 |
+
"""Stop the pool and cleanup resources."""
|
96 |
+
if not self._running:
|
97 |
+
return
|
98 |
+
|
99 |
+
self._running = False
|
100 |
+
logger.info("Stopping warm sandbox pool")
|
101 |
+
|
102 |
+
# Cancel background tasks
|
103 |
+
for task in [self._warmup_task, self._health_check_task, self._cleanup_task]:
|
104 |
+
if task and not task.done():
|
105 |
+
task.cancel()
|
106 |
+
try:
|
107 |
+
await task
|
108 |
+
except asyncio.CancelledError:
|
109 |
+
pass
|
110 |
+
# Cleanup remaining sandboxes
|
111 |
+
while not self._sandbox_queue.empty():
|
112 |
+
try:
|
113 |
+
pooled_sb = self._sandbox_queue.get_nowait()
|
114 |
+
await self._terminate_sandbox(pooled_sb.sandbox)
|
115 |
+
except asyncio.QueueEmpty:
|
116 |
+
break
|
117 |
+
|
118 |
+
@asynccontextmanager
|
119 |
+
async def get_sandbox(self, timeout: float = 5.0):
|
120 |
+
pooled_sb = None
|
121 |
+
created_new = False
|
122 |
+
try:
|
123 |
+
# Check if we need to reset the pool due to consecutive failures
|
124 |
+
if self._consecutive_failures >= self._pool_reset_threshold:
|
125 |
+
logger.warning(f"Pool has {self._consecutive_failures} consecutive failures, attempting reset")
|
126 |
+
await self._emergency_pool_reset()
|
127 |
+
|
128 |
+
# Try to get a warm sandbox from the pool, retry if not alive
|
129 |
+
max_retries = 3 # Increased retries for better reliability
|
130 |
+
for attempt in range(max_retries):
|
131 |
+
try:
|
132 |
+
# Try to get from pool first
|
133 |
+
pooled_sb = await asyncio.wait_for(self._sandbox_queue.get(), timeout=timeout)
|
134 |
+
# Check if the sandbox is alive
|
135 |
+
alive = await self._is_sandbox_alive(pooled_sb.sandbox)
|
136 |
+
if not alive:
|
137 |
+
logger.info(f"Got dead sandbox from pool on attempt {attempt + 1}, terminating and trying next.")
|
138 |
+
await self._terminate_sandbox(pooled_sb.sandbox)
|
139 |
+
pooled_sb = None
|
140 |
+
continue # Try again
|
141 |
+
|
142 |
+
# Sandbox is alive, use it
|
143 |
+
pooled_sb.last_used = time.time()
|
144 |
+
pooled_sb.use_count += 1
|
145 |
+
self._stats["reused"] += 1
|
146 |
+
self._consecutive_failures = 0 # Reset failure counter on success
|
147 |
+
break
|
148 |
+
|
149 |
+
except asyncio.TimeoutError:
|
150 |
+
# Pool empty or taking too long, create a new one
|
151 |
+
logger.info(f"Pool timeout on attempt {attempt + 1}, creating new sandbox")
|
152 |
+
try:
|
153 |
+
sandbox = await self._create_sandbox()
|
154 |
+
pooled_sb = PooledSandbox(
|
155 |
+
sandbox=sandbox,
|
156 |
+
created_at=time.time(),
|
157 |
+
last_used=time.time(),
|
158 |
+
use_count=1
|
159 |
+
)
|
160 |
+
created_new = True
|
161 |
+
self._stats["created"] += 1
|
162 |
+
self._consecutive_failures = 0 # Reset failure counter on success
|
163 |
+
self._last_successful_creation = time.time()
|
164 |
+
break
|
165 |
+
except Exception as create_error:
|
166 |
+
logger.error(f"Failed to create sandbox on attempt {attempt + 1}: {create_error}")
|
167 |
+
self._consecutive_failures += 1
|
168 |
+
if attempt == max_retries - 1: # Last attempt
|
169 |
+
raise CodeExecutionError(f"Failed to create sandbox after {max_retries} attempts: {create_error}")
|
170 |
+
await asyncio.sleep(2 ** attempt) # Exponential backoff
|
171 |
+
else:
|
172 |
+
self._consecutive_failures += 1
|
173 |
+
raise CodeExecutionError("Could not obtain a live sandbox from the pool after all retry attempts.")
|
174 |
+
|
175 |
+
logger.info(f"Yielding sandbox of type from sandbox_pool: {type(pooled_sb.sandbox)}")
|
176 |
+
yield pooled_sb.sandbox
|
177 |
+
|
178 |
+
except Exception as e:
|
179 |
+
logger.error(f"Error getting sandbox: {e}")
|
180 |
+
self._stats["failures"] += 1
|
181 |
+
self._consecutive_failures += 1
|
182 |
+
raise CodeExecutionError(f"Failed to get sandbox: {e}")
|
183 |
+
finally:
|
184 |
+
if pooled_sb:
|
185 |
+
should_recycle = (
|
186 |
+
not created_new and
|
187 |
+
self._should_recycle_sandbox(pooled_sb) and
|
188 |
+
self._running
|
189 |
+
)
|
190 |
+
if should_recycle:
|
191 |
+
# Double-check sandbox is alive and functional before returning to pool
|
192 |
+
if await self._is_sandbox_alive(pooled_sb.sandbox):
|
193 |
+
# Additional check: try a quick execution to ensure sandbox is fully functional
|
194 |
+
try:
|
195 |
+
await asyncio.wait_for(
|
196 |
+
asyncio.get_event_loop().run_in_executor(
|
197 |
+
None,
|
198 |
+
lambda: pooled_sb.sandbox.exec("python", "-c", "import sys; print('ready')", timeout=2)
|
199 |
+
),
|
200 |
+
timeout=3.0
|
201 |
+
)
|
202 |
+
|
203 |
+
# Sandbox is healthy and functional - return to pool
|
204 |
+
try:
|
205 |
+
self._sandbox_queue.put_nowait(pooled_sb)
|
206 |
+
logger.debug("Returned healthy sandbox to pool")
|
207 |
+
except asyncio.QueueFull:
|
208 |
+
# Pool is full - terminate excess sandbox
|
209 |
+
await self._terminate_sandbox(pooled_sb.sandbox)
|
210 |
+
logger.debug("Pool full, terminated excess sandbox")
|
211 |
+
except Exception as e:
|
212 |
+
# Sandbox failed functional test - terminate it
|
213 |
+
logger.debug(f"Sandbox failed functional test, terminating: {e}")
|
214 |
+
await self._terminate_sandbox(pooled_sb.sandbox)
|
215 |
+
else:
|
216 |
+
# Sandbox is dead - terminate it
|
217 |
+
logger.debug("Sandbox is dead, terminating instead of recycling")
|
218 |
+
await self._terminate_sandbox(pooled_sb.sandbox)
|
219 |
+
else:
|
220 |
+
# Should not recycle - terminate sandbox
|
221 |
+
await self._terminate_sandbox(pooled_sb.sandbox)
|
222 |
+
if not created_new:
|
223 |
+
self._stats["recycled"] += 1
|
224 |
+
logger.debug("Terminated sandbox (exceeded recycle criteria)")
|
225 |
+
|
226 |
+
async def _create_sandbox(self) -> modal.Sandbox:
|
227 |
+
"""Create a new Modal sandbox with timeout protection."""
|
228 |
+
try:
|
229 |
+
# Add timeout protection for sandbox creation
|
230 |
+
sandbox_creation = asyncio.get_event_loop().run_in_executor(
|
231 |
+
None,
|
232 |
+
lambda: modal.Sandbox.create(
|
233 |
+
app=self.app,
|
234 |
+
image=self.image,
|
235 |
+
cpu=2.0,
|
236 |
+
memory=1024,
|
237 |
+
timeout=35
|
238 |
+
)
|
239 |
+
)
|
240 |
+
# Wait for sandbox creation with timeout
|
241 |
+
sandbox = await asyncio.wait_for(sandbox_creation, timeout=120) # 2 minute timeout
|
242 |
+
logger.debug(f"Created new sandbox of type: {type(sandbox)}")
|
243 |
+
return sandbox
|
244 |
+
except asyncio.TimeoutError:
|
245 |
+
logger.error("Sandbox creation timed out after 2 minutes")
|
246 |
+
raise Exception("Sandbox creation timed out - Modal may be experiencing issues")
|
247 |
+
except Exception as e:
|
248 |
+
logger.error(f"Failed to create sandbox: {e}")
|
249 |
+
raise
|
250 |
+
|
251 |
+
async def _terminate_sandbox(self, sandbox: modal.Sandbox):
|
252 |
+
"""Safely terminate a sandbox with better error handling."""
|
253 |
+
try:
|
254 |
+
# Check if sandbox is still responsive before termination
|
255 |
+
if hasattr(sandbox, '_terminated') and sandbox._terminated:
|
256 |
+
logger.debug("Sandbox already terminated")
|
257 |
+
return
|
258 |
+
|
259 |
+
# Use asyncio timeout for termination
|
260 |
+
await asyncio.wait_for(
|
261 |
+
asyncio.get_event_loop().run_in_executor(None, sandbox.terminate),
|
262 |
+
timeout=10.0 # 10 second timeout for termination
|
263 |
+
)
|
264 |
+
logger.debug("Terminated sandbox successfully")
|
265 |
+
except asyncio.TimeoutError:
|
266 |
+
logger.warning("Sandbox termination timed out - may be unresponsive")
|
267 |
+
except Exception as e:
|
268 |
+
# Log the error but don't fail - sandbox may already be dead
|
269 |
+
logger.warning(f"Failed to terminate sandbox (may already be dead): {e}")
|
270 |
+
# Mark sandbox as terminated to avoid repeated attempts
|
271 |
+
if hasattr(sandbox, '_terminated'):
|
272 |
+
sandbox._terminated = True
|
273 |
+
|
274 |
+
def _should_recycle_sandbox(self, pooled_sb: PooledSandbox) -> bool:
|
275 |
+
"""Determine if a sandbox should be recycled back to the pool."""
|
276 |
+
now = time.time()
|
277 |
+
|
278 |
+
# Check age
|
279 |
+
if now - pooled_sb.created_at > self.max_age_seconds:
|
280 |
+
logger.debug("Sandbox too old, not recycling")
|
281 |
+
return False
|
282 |
+
|
283 |
+
# Check usage count
|
284 |
+
if pooled_sb.use_count >= self.max_uses_per_sandbox:
|
285 |
+
logger.debug("Sandbox used too many times, not recycling")
|
286 |
+
return False
|
287 |
+
|
288 |
+
# Check health (if we've checked it)
|
289 |
+
if pooled_sb.health == SandboxHealth.UNHEALTHY:
|
290 |
+
logger.debug("Sandbox unhealthy, not recycling")
|
291 |
+
return False
|
292 |
+
|
293 |
+
return True
|
294 |
+
async def _warmup_pool(self):
|
295 |
+
"""Background task to maintain warm sandboxes in the pool with aggressive replenishment."""
|
296 |
+
while self._running:
|
297 |
+
try:
|
298 |
+
current_size = self._sandbox_queue.qsize()
|
299 |
+
|
300 |
+
# More aggressive warmup - start warming when below 90% capacity
|
301 |
+
warmup_threshold = max(1, int(self.pool_size * 0.9))
|
302 |
+
|
303 |
+
if current_size < warmup_threshold:
|
304 |
+
needed = self.pool_size - current_size
|
305 |
+
logger.info(f"Pool size ({current_size}) below threshold ({warmup_threshold}). Warming {needed} sandboxes...")
|
306 |
+
|
307 |
+
# Create new sandboxes to fill the pool - but limit concurrent creation
|
308 |
+
max_concurrent = min(needed, 2) # Don't overwhelm Modal
|
309 |
+
tasks = []
|
310 |
+
for _ in range(max_concurrent):
|
311 |
+
task = asyncio.create_task(self._create_and_queue_sandbox())
|
312 |
+
tasks.append(task)
|
313 |
+
|
314 |
+
if tasks:
|
315 |
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
316 |
+
# Log any failures
|
317 |
+
successful = 0
|
318 |
+
for i, result in enumerate(results):
|
319 |
+
if isinstance(result, Exception):
|
320 |
+
logger.warning(f"Failed to create sandbox {i+1}/{max_concurrent}: {result}")
|
321 |
+
else:
|
322 |
+
successful += 1
|
323 |
+
|
324 |
+
if successful > 0:
|
325 |
+
logger.info(f"Successfully warmed {successful}/{max_concurrent} sandboxes")
|
326 |
+
|
327 |
+
# Adaptive sleep interval based on pool health
|
328 |
+
if current_size == 0:
|
329 |
+
# Critical: no sandboxes available
|
330 |
+
sleep_interval = 1
|
331 |
+
elif current_size < warmup_threshold:
|
332 |
+
# Low: need more sandboxes
|
333 |
+
sleep_interval = 2
|
334 |
+
else:
|
335 |
+
# Healthy: normal monitoring
|
336 |
+
sleep_interval = 5
|
337 |
+
|
338 |
+
await asyncio.sleep(sleep_interval)
|
339 |
+
|
340 |
+
except Exception as e:
|
341 |
+
logger.error(f"Error in warmup loop: {e}")
|
342 |
+
await asyncio.sleep(10) # Wait longer on error
|
343 |
+
|
344 |
+
async def _create_and_queue_sandbox(self):
|
345 |
+
"""Create a sandbox and add it to the queue."""
|
346 |
+
start_time = time.time()
|
347 |
+
try:
|
348 |
+
# Create the sandbox
|
349 |
+
sandbox = await self._create_sandbox()
|
350 |
+
creation_time = time.time() - start_time
|
351 |
+
logger.info(f"Sandbox creation took {creation_time:.2f}s")
|
352 |
+
|
353 |
+
# Proactively warm up the sandbox with core imports
|
354 |
+
warmup_start = time.time()
|
355 |
+
await self._warmup_sandbox_imports(sandbox)
|
356 |
+
warmup_time = time.time() - warmup_start
|
357 |
+
logger.info(f"Sandbox warmup with imports took {warmup_time:.2f}s")
|
358 |
+
|
359 |
+
pooled_sb = PooledSandbox(
|
360 |
+
sandbox=sandbox,
|
361 |
+
created_at=time.time(),
|
362 |
+
last_used=time.time()
|
363 |
+
)
|
364 |
+
|
365 |
+
try:
|
366 |
+
self._sandbox_queue.put_nowait(pooled_sb)
|
367 |
+
total_time = time.time() - start_time
|
368 |
+
logger.info(f"Added warm sandbox to pool (total time: {total_time:.2f}s)")
|
369 |
+
except asyncio.QueueFull:
|
370 |
+
# Pool is full, terminate this sandbox
|
371 |
+
await self._terminate_sandbox(sandbox)
|
372 |
+
|
373 |
+
except Exception as e:
|
374 |
+
total_time = time.time() - start_time
|
375 |
+
logger.error(f"Failed to create and queue sandbox after {total_time:.2f}s: {e}")
|
376 |
+
|
377 |
+
async def _warmup_sandbox_imports(self, sandbox: modal.Sandbox):
|
378 |
+
"""Warm up sandbox by importing core packages."""
|
379 |
+
try:
|
380 |
+
from mcp_hub.package_utils import get_warmup_import_commands
|
381 |
+
|
382 |
+
# Get warmup commands
|
383 |
+
import_commands = get_warmup_import_commands()
|
384 |
+
warmup_script = "; ".join(import_commands)
|
385 |
+
|
386 |
+
# Execute the warmup script
|
387 |
+
logger.debug("Running sandbox warmup imports...")
|
388 |
+
proc = await asyncio.get_event_loop().run_in_executor(
|
389 |
+
None,
|
390 |
+
lambda: sandbox.exec("python", "-c", warmup_script, timeout=30)
|
391 |
+
)
|
392 |
+
|
393 |
+
# Check if warmup was successful
|
394 |
+
if hasattr(proc, 'stdout') and hasattr(proc.stdout, 'read'):
|
395 |
+
output = proc.stdout.read()
|
396 |
+
if "Core packages warmed up successfully" in output:
|
397 |
+
logger.debug("Sandbox warmup imports completed successfully")
|
398 |
+
else:
|
399 |
+
logger.warning(f"Sandbox warmup completed but output unexpected: {output}")
|
400 |
+
else:
|
401 |
+
logger.debug("Sandbox warmup imports completed")
|
402 |
+
|
403 |
+
except Exception as e:
|
404 |
+
logger.warning(f"Failed to warm up sandbox imports (sandbox still usable): {e}")
|
405 |
+
async def _health_check_loop(self):
|
406 |
+
"""Background task to check sandbox health and perform proactive cleanup."""
|
407 |
+
while self._running:
|
408 |
+
try:
|
409 |
+
# Perform regular health checks every interval
|
410 |
+
await asyncio.sleep(self.health_check_interval)
|
411 |
+
|
412 |
+
# First do a quick proactive cleanup
|
413 |
+
cleaned = await self._proactive_cleanup()
|
414 |
+
|
415 |
+
# Then do the full health check
|
416 |
+
await self._perform_health_checks()
|
417 |
+
|
418 |
+
# If we cleaned up sandboxes, trigger warmup
|
419 |
+
if cleaned > 0:
|
420 |
+
logger.info(f"Health check cleaned {cleaned} sandboxes, pool may need warming")
|
421 |
+
|
422 |
+
except Exception as e:
|
423 |
+
logger.error(f"Error in health check loop: {e}")
|
424 |
+
await asyncio.sleep(10) # Wait longer on error
|
425 |
+
|
426 |
+
async def _perform_health_checks(self):
|
427 |
+
"""Perform health checks on sandboxes in the pool."""
|
428 |
+
# This is a simplified health check - in practice you might want
|
429 |
+
# to run a simple command to verify the sandbox is responsive
|
430 |
+
temp_sandboxes = []
|
431 |
+
|
432 |
+
# Drain the queue to check each sandbox
|
433 |
+
while not self._sandbox_queue.empty():
|
434 |
+
try:
|
435 |
+
pooled_sb = self._sandbox_queue.get_nowait()
|
436 |
+
is_healthy = await self._check_sandbox_health(pooled_sb.sandbox)
|
437 |
+
pooled_sb.health = SandboxHealth.HEALTHY if is_healthy else SandboxHealth.UNHEALTHY
|
438 |
+
if is_healthy:
|
439 |
+
temp_sandboxes.append(pooled_sb)
|
440 |
+
else:
|
441 |
+
# TERMINATE unhealthy sandbox
|
442 |
+
await self._terminate_sandbox(pooled_sb.sandbox)
|
443 |
+
self._stats["recycled"] += 1
|
444 |
+
except asyncio.QueueEmpty:
|
445 |
+
break
|
446 |
+
|
447 |
+
# Put healthy sandboxes back
|
448 |
+
for pooled_sb in temp_sandboxes:
|
449 |
+
try:
|
450 |
+
self._sandbox_queue.put_nowait(pooled_sb)
|
451 |
+
except asyncio.QueueFull:
|
452 |
+
await self._terminate_sandbox(pooled_sb.sandbox)
|
453 |
+
|
454 |
+
self._stats["health_checks"] += 1
|
455 |
+
logger.debug(f"Health check completed. Pool size: {self._sandbox_queue.qsize()}")
|
456 |
+
|
457 |
+
async def _check_sandbox_health(self, sandbox: modal.Sandbox) -> bool:
|
458 |
+
"""Check if a sandbox is healthy."""
|
459 |
+
try:
|
460 |
+
# Run a simple Python command to check if the sandbox is responsive
|
461 |
+
proc = await asyncio.get_event_loop().run_in_executor(
|
462 |
+
None,
|
463 |
+
lambda: sandbox.exec("python", "-c", "print('health_check')", timeout=5)
|
464 |
+
)
|
465 |
+
output = proc.stdout.read()
|
466 |
+
return "health_check" in output
|
467 |
+
except Exception as e:
|
468 |
+
logger.debug(f"Sandbox health check failed: {e}")
|
469 |
+
return False
|
470 |
+
|
471 |
+
async def _cleanup_loop(self):
|
472 |
+
"""Background task to cleanup old sandboxes."""
|
473 |
+
while self._running:
|
474 |
+
try:
|
475 |
+
await asyncio.sleep(30) # Check every 30 seconds
|
476 |
+
await self._cleanup_old_sandboxes()
|
477 |
+
except Exception as e:
|
478 |
+
logger.error(f"Error in cleanup loop: {e}")
|
479 |
+
|
480 |
+
async def _cleanup_old_sandboxes(self):
|
481 |
+
"""Remove old sandboxes from the pool."""
|
482 |
+
now = time.time()
|
483 |
+
temp_sandboxes = []
|
484 |
+
|
485 |
+
while not self._sandbox_queue.empty():
|
486 |
+
try:
|
487 |
+
pooled_sb = self._sandbox_queue.get_nowait()
|
488 |
+
if now - pooled_sb.created_at < self.max_age_seconds:
|
489 |
+
temp_sandboxes.append(pooled_sb)
|
490 |
+
else:
|
491 |
+
# TERMINATE expired sandbox
|
492 |
+
await self._terminate_sandbox(pooled_sb.sandbox)
|
493 |
+
self._stats["recycled"] += 1
|
494 |
+
logger.debug("Cleaned up old sandbox")
|
495 |
+
except asyncio.QueueEmpty:
|
496 |
+
break
|
497 |
+
|
498 |
+
# Put non-expired sandboxes back
|
499 |
+
for pooled_sb in temp_sandboxes:
|
500 |
+
try:
|
501 |
+
self._sandbox_queue.put_nowait(pooled_sb)
|
502 |
+
except asyncio.QueueFull:
|
503 |
+
await self._terminate_sandbox(pooled_sb.sandbox)
|
504 |
+
|
505 |
+
async def _is_sandbox_alive(self, sandbox: modal.Sandbox) -> bool:
|
506 |
+
"""Check if a sandbox is alive by running a trivial command with better error handling."""
|
507 |
+
try:
|
508 |
+
# Check if sandbox was already marked as terminated
|
509 |
+
if hasattr(sandbox, '_terminated') and sandbox._terminated:
|
510 |
+
return False
|
511 |
+
|
512 |
+
# Use a shorter timeout for liveness checks
|
513 |
+
proc = await asyncio.wait_for(
|
514 |
+
asyncio.get_event_loop().run_in_executor(
|
515 |
+
None,
|
516 |
+
lambda: sandbox.exec("python", "-c", "print('ping')", timeout=3)
|
517 |
+
),
|
518 |
+
timeout=5.0 # Overall timeout
|
519 |
+
)
|
520 |
+
|
521 |
+
if hasattr(proc, "stdout") and hasattr(proc.stdout, "read"):
|
522 |
+
out = proc.stdout.read()
|
523 |
+
return "ping" in out
|
524 |
+
else:
|
525 |
+
# For some Modal versions, output might be returned directly
|
526 |
+
out = str(proc)
|
527 |
+
return "ping" in out
|
528 |
+
|
529 |
+
except asyncio.TimeoutError:
|
530 |
+
logger.debug("Liveness check timed out - sandbox likely dead")
|
531 |
+
return False
|
532 |
+
except Exception as e:
|
533 |
+
logger.debug(f"Liveness check failed: {e}")
|
534 |
+
# Mark sandbox as dead to avoid repeated checks
|
535 |
+
if hasattr(sandbox, '_terminated'):
|
536 |
+
sandbox._terminated = True
|
537 |
+
return False
|
538 |
+
|
539 |
+
async def _emergency_pool_reset(self):
|
540 |
+
"""Emergency reset of the pool when too many consecutive failures occur."""
|
541 |
+
logger.warning("Performing emergency pool reset due to consecutive failures")
|
542 |
+
|
543 |
+
# Drain and terminate all sandboxes in the pool
|
544 |
+
terminated_count = 0
|
545 |
+
while not self._sandbox_queue.empty():
|
546 |
+
try:
|
547 |
+
pooled_sb = self._sandbox_queue.get_nowait()
|
548 |
+
await self._terminate_sandbox(pooled_sb.sandbox)
|
549 |
+
terminated_count += 1
|
550 |
+
except asyncio.QueueEmpty:
|
551 |
+
break
|
552 |
+
|
553 |
+
logger.info(f"Emergency reset: terminated {terminated_count} sandboxes")
|
554 |
+
|
555 |
+
# Reset failure counter
|
556 |
+
self._consecutive_failures = 0
|
557 |
+
|
558 |
+
# Try to create one fresh sandbox to test if the underlying issue is resolved
|
559 |
+
try:
|
560 |
+
test_sandbox = await self._create_sandbox()
|
561 |
+
test_pooled = PooledSandbox(
|
562 |
+
sandbox=test_sandbox,
|
563 |
+
created_at=time.time(),
|
564 |
+
last_used=time.time(),
|
565 |
+
use_count=0
|
566 |
+
)
|
567 |
+
self._sandbox_queue.put_nowait(test_pooled)
|
568 |
+
logger.info("Emergency reset successful: created test sandbox")
|
569 |
+
except Exception as e:
|
570 |
+
logger.error(f"Emergency reset failed to create test sandbox: {e}")
|
571 |
+
# Still reset the counter to allow retries
|
572 |
+
pass
|
573 |
+
|
574 |
+
def get_stats(self) -> Dict[str, Any]:
|
575 |
+
"""Get pool statistics including health metrics."""
|
576 |
+
return {
|
577 |
+
**self._stats,
|
578 |
+
"pool_size": self._sandbox_queue.qsize(),
|
579 |
+
"target_pool_size": self.pool_size,
|
580 |
+
"running": self._running,
|
581 |
+
"consecutive_failures": self._consecutive_failures,
|
582 |
+
"last_successful_creation": self._last_successful_creation,
|
583 |
+
"time_since_last_success": time.time() - self._last_successful_creation,
|
584 |
+
"health_status": "healthy" if self._consecutive_failures < 3 else "degraded" if self._consecutive_failures < self._pool_reset_threshold else "critical"
|
585 |
+
}
|
586 |
+
|
587 |
+
async def _proactive_cleanup(self):
|
588 |
+
"""Proactively clean up dead or unhealthy sandboxes from the pool."""
|
589 |
+
temp_sandboxes = []
|
590 |
+
cleaned_count = 0
|
591 |
+
|
592 |
+
# Drain the queue to check each sandbox
|
593 |
+
while not self._sandbox_queue.empty():
|
594 |
+
try:
|
595 |
+
pooled_sb = self._sandbox_queue.get_nowait()
|
596 |
+
|
597 |
+
# Quick health check
|
598 |
+
if await self._is_sandbox_alive(pooled_sb.sandbox):
|
599 |
+
# Sandbox is alive - keep it
|
600 |
+
temp_sandboxes.append(pooled_sb)
|
601 |
+
else:
|
602 |
+
# Sandbox is dead - terminate it
|
603 |
+
await self._terminate_sandbox(pooled_sb.sandbox)
|
604 |
+
cleaned_count += 1
|
605 |
+
logger.debug("Cleaned up dead sandbox during proactive cleanup")
|
606 |
+
|
607 |
+
except asyncio.QueueEmpty:
|
608 |
+
break
|
609 |
+
|
610 |
+
# Put healthy sandboxes back
|
611 |
+
for pooled_sb in temp_sandboxes:
|
612 |
+
try:
|
613 |
+
self._sandbox_queue.put_nowait(pooled_sb)
|
614 |
+
except asyncio.QueueFull:
|
615 |
+
# Shouldn't happen, but terminate if it does
|
616 |
+
await self._terminate_sandbox(pooled_sb.sandbox)
|
617 |
+
cleaned_count += 1
|
618 |
+
|
619 |
+
if cleaned_count > 0:
|
620 |
+
logger.info(f"Proactive cleanup removed {cleaned_count} dead sandboxes")
|
621 |
+
|
622 |
+
return cleaned_count
|
623 |
+
|
624 |
+
# Helper function for testing and debugging the sandbox pool
|
625 |
+
async def test_sandbox_pool_health(pool: WarmSandboxPool) -> Dict[str, Any]:
|
626 |
+
"""Test sandbox pool health and return detailed diagnostics."""
|
627 |
+
diagnostics: Dict[str, Any] = {
|
628 |
+
"timestamp": time.time(),
|
629 |
+
"pool_stats": pool.get_stats(),
|
630 |
+
"tests": {}
|
631 |
+
}
|
632 |
+
|
633 |
+
logger.info("Starting sandbox pool health test...")
|
634 |
+
|
635 |
+
# Test 1: Pool basic stats
|
636 |
+
stats = pool.get_stats()
|
637 |
+
diagnostics["tests"]["pool_stats"] = {
|
638 |
+
"passed": True,
|
639 |
+
"details": stats
|
640 |
+
}
|
641 |
+
|
642 |
+
# Test 2: Try to get a sandbox
|
643 |
+
try:
|
644 |
+
async with pool.get_sandbox(timeout=10.0) as sandbox:
|
645 |
+
# Test 3: Try to run a simple command
|
646 |
+
try:
|
647 |
+
proc = await asyncio.get_event_loop().run_in_executor(
|
648 |
+
None,
|
649 |
+
lambda: sandbox.exec("python", "-c", "print('health_test_ok')", timeout=5)
|
650 |
+
)
|
651 |
+
output = proc.stdout.read() if hasattr(proc.stdout, "read") else str(proc)
|
652 |
+
|
653 |
+
diagnostics["tests"]["sandbox_execution"] = {
|
654 |
+
"passed": "health_test_ok" in output,
|
655 |
+
"output": output[:200], # First 200 chars
|
656 |
+
"details": "Successfully executed test command"
|
657 |
+
}
|
658 |
+
except Exception as e:
|
659 |
+
diagnostics["tests"]["sandbox_execution"] = {
|
660 |
+
"passed": False,
|
661 |
+
"error": str(e),
|
662 |
+
"details": "Failed to execute test command in sandbox"
|
663 |
+
}
|
664 |
+
|
665 |
+
diagnostics["tests"]["sandbox_acquisition"] = {
|
666 |
+
"passed": True,
|
667 |
+
"details": "Successfully acquired and released sandbox"
|
668 |
+
}
|
669 |
+
|
670 |
+
except Exception as e:
|
671 |
+
diagnostics["tests"]["sandbox_acquisition"] = {
|
672 |
+
"passed": False,
|
673 |
+
"error": str(e),
|
674 |
+
"details": "Failed to acquire sandbox from pool"
|
675 |
+
}
|
676 |
+
|
677 |
+
diagnostics["tests"]["sandbox_execution"] = {
|
678 |
+
"passed": False,
|
679 |
+
"error": "Could not test - no sandbox available",
|
680 |
+
"details": "Skipped due to sandbox acquisition failure"
|
681 |
+
}
|
682 |
+
|
683 |
+
# Test 4: Check pool warmup status
|
684 |
+
if pool._running:
|
685 |
+
warmup_needed = pool.pool_size - stats["pool_size"]
|
686 |
+
diagnostics["tests"]["pool_warmup"] = {
|
687 |
+
"passed": warmup_needed <= 1, # Allow 1 sandbox to be missing
|
688 |
+
"details": f"Pool has {stats['pool_size']}/{pool.pool_size} sandboxes, {warmup_needed} needed"
|
689 |
+
}
|
690 |
+
else:
|
691 |
+
diagnostics["tests"]["pool_warmup"] = {
|
692 |
+
"passed": False,
|
693 |
+
"details": "Pool is not running"
|
694 |
+
}
|
695 |
+
|
696 |
+
# Overall health assessment
|
697 |
+
all_tests_passed = all(test.get("passed", False) for test in diagnostics["tests"].values())
|
698 |
+
diagnostics["overall_health"] = "healthy" if all_tests_passed else "unhealthy"
|
699 |
+
|
700 |
+
logger.info(f"Sandbox pool health test completed. Overall health: {diagnostics['overall_health']}")
|
701 |
+
return diagnostics
|
mcp_hub/utils.py
ADDED
@@ -0,0 +1,439 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Utility functions for the MCP Hub project."""
|
2 |
+
|
3 |
+
import json
|
4 |
+
import re
|
5 |
+
from typing import Dict, Any, List, Optional, Union
|
6 |
+
from openai import OpenAI, AsyncOpenAI
|
7 |
+
from .config import api_config, model_config
|
8 |
+
from .exceptions import APIError, ValidationError
|
9 |
+
from .logging_config import logger
|
10 |
+
import aiohttp
|
11 |
+
from huggingface_hub import InferenceClient
|
12 |
+
|
13 |
+
|
14 |
+
def create_nebius_client() -> OpenAI:
|
15 |
+
"""Create and return a Nebius OpenAI client."""
|
16 |
+
return OpenAI(
|
17 |
+
base_url=api_config.nebius_base_url,
|
18 |
+
api_key=api_config.nebius_api_key,
|
19 |
+
)
|
20 |
+
|
21 |
+
def create_async_nebius_client() -> AsyncOpenAI:
|
22 |
+
"""Create and return an async Nebius OpenAI client."""
|
23 |
+
return AsyncOpenAI(
|
24 |
+
base_url=api_config.nebius_base_url,
|
25 |
+
api_key=api_config.nebius_api_key,
|
26 |
+
)
|
27 |
+
|
28 |
+
def create_llm_client() -> Union[OpenAI, object]:
|
29 |
+
"""Create and return an LLM client based on the configured provider."""
|
30 |
+
if api_config.llm_provider == "nebius":
|
31 |
+
return create_nebius_client()
|
32 |
+
elif api_config.llm_provider == "openai":
|
33 |
+
return OpenAI(api_key=api_config.openai_api_key)
|
34 |
+
elif api_config.llm_provider == "anthropic":
|
35 |
+
try:
|
36 |
+
import anthropic
|
37 |
+
return anthropic.Anthropic(api_key=api_config.anthropic_api_key)
|
38 |
+
except ImportError:
|
39 |
+
raise APIError("Anthropic", "anthropic package not installed. Install with: pip install anthropic")
|
40 |
+
elif api_config.llm_provider == "huggingface":
|
41 |
+
# Try different HuggingFace client configurations for better compatibility
|
42 |
+
try:
|
43 |
+
# First try with hf-inference provider (most recent approach)
|
44 |
+
return InferenceClient(
|
45 |
+
provider="hf-inference",
|
46 |
+
api_key=api_config.huggingface_api_key,
|
47 |
+
)
|
48 |
+
except Exception:
|
49 |
+
# Fallback to token-based authentication
|
50 |
+
return InferenceClient(
|
51 |
+
token=api_config.huggingface_api_key,
|
52 |
+
)
|
53 |
+
else:
|
54 |
+
raise APIError("Config", f"Unsupported LLM provider: {api_config.llm_provider}")
|
55 |
+
|
56 |
+
def create_async_llm_client() -> Union[AsyncOpenAI, object]:
|
57 |
+
"""Create and return an async LLM client based on the configured provider."""
|
58 |
+
if api_config.llm_provider == "nebius":
|
59 |
+
return create_async_nebius_client()
|
60 |
+
elif api_config.llm_provider == "openai":
|
61 |
+
return AsyncOpenAI(api_key=api_config.openai_api_key)
|
62 |
+
elif api_config.llm_provider == "anthropic":
|
63 |
+
try:
|
64 |
+
import anthropic
|
65 |
+
return anthropic.AsyncAnthropic(api_key=api_config.anthropic_api_key)
|
66 |
+
except ImportError:
|
67 |
+
raise APIError("Anthropic", "anthropic package not installed. Install with: pip install anthropic")
|
68 |
+
elif api_config.llm_provider == "huggingface":
|
69 |
+
# Try different HuggingFace client configurations for better compatibility
|
70 |
+
try:
|
71 |
+
# First try with hf-inference provider (most recent approach)
|
72 |
+
return InferenceClient(
|
73 |
+
provider="hf-inference",
|
74 |
+
api_key=api_config.huggingface_api_key,
|
75 |
+
)
|
76 |
+
except Exception:
|
77 |
+
# Fallback to token-based authentication
|
78 |
+
return InferenceClient(
|
79 |
+
token=api_config.huggingface_api_key,
|
80 |
+
)
|
81 |
+
else:
|
82 |
+
raise APIError("Config", f"Unsupported LLM provider: {api_config.llm_provider}")
|
83 |
+
|
84 |
+
def validate_non_empty_string(value: str, field_name: str) -> None:
|
85 |
+
"""Validate that a string is not empty or None."""
|
86 |
+
if not value or not value.strip():
|
87 |
+
raise ValidationError(f"{field_name} cannot be empty.")
|
88 |
+
|
89 |
+
def extract_json_from_text(text: str) -> Dict[str, Any]:
|
90 |
+
"""Extract JSON object from text that may contain markdown fences."""
|
91 |
+
# Remove markdown code fences if present
|
92 |
+
if text.startswith("```"):
|
93 |
+
parts = text.split("```")
|
94 |
+
if len(parts) >= 3:
|
95 |
+
text = parts[1].strip()
|
96 |
+
else:
|
97 |
+
text = text.strip("```").strip()
|
98 |
+
|
99 |
+
# Find JSON object boundaries
|
100 |
+
start_idx = text.find("{")
|
101 |
+
end_idx = text.rfind("}")
|
102 |
+
|
103 |
+
if start_idx == -1 or end_idx == -1 or end_idx < start_idx:
|
104 |
+
raise ValidationError("Failed to locate JSON object in text.")
|
105 |
+
|
106 |
+
json_candidate = text[start_idx:end_idx + 1]
|
107 |
+
|
108 |
+
try:
|
109 |
+
return json.loads(json_candidate)
|
110 |
+
except json.JSONDecodeError as e:
|
111 |
+
raise ValidationError(f"Failed to parse JSON: {str(e)}")
|
112 |
+
|
113 |
+
def extract_urls_from_text(text: str) -> List[str]:
|
114 |
+
"""Extract URLs from text using regex."""
|
115 |
+
url_pattern = r"(https?://[^\s]+)"
|
116 |
+
return re.findall(url_pattern, text)
|
117 |
+
|
118 |
+
def make_nebius_completion(
|
119 |
+
model: str,
|
120 |
+
messages: List[Dict[str, str]],
|
121 |
+
temperature: float = 0.6,
|
122 |
+
response_format: Optional[Dict[str, Any]] = None
|
123 |
+
) -> str:
|
124 |
+
"""Make a completion request to Nebius and return the content."""
|
125 |
+
client = create_nebius_client()
|
126 |
+
|
127 |
+
try:
|
128 |
+
kwargs = {
|
129 |
+
"model": model,
|
130 |
+
"messages": messages,
|
131 |
+
"temperature": temperature,
|
132 |
+
}
|
133 |
+
|
134 |
+
if response_format:
|
135 |
+
kwargs["response_format"] = response_format
|
136 |
+
|
137 |
+
completion = client.chat.completions.create(**kwargs)
|
138 |
+
return completion.choices[0].message.content.strip()
|
139 |
+
except Exception as e:
|
140 |
+
raise APIError("Nebius", str(e))
|
141 |
+
|
142 |
+
async def make_async_nebius_completion(
|
143 |
+
model: str,
|
144 |
+
messages: List[Dict[str, Any]],
|
145 |
+
temperature: float = 0.0,
|
146 |
+
response_format: Optional[Dict[str, Any]] = None,
|
147 |
+
) -> str:
|
148 |
+
"""Make an async completion request to Nebius API."""
|
149 |
+
try:
|
150 |
+
client = create_async_nebius_client()
|
151 |
+
|
152 |
+
kwargs = {
|
153 |
+
"model": model,
|
154 |
+
"messages": messages,
|
155 |
+
"temperature": temperature
|
156 |
+
}
|
157 |
+
|
158 |
+
if response_format:
|
159 |
+
kwargs["response_format"] = response_format
|
160 |
+
|
161 |
+
response = await client.chat.completions.create(**kwargs)
|
162 |
+
|
163 |
+
if not response.choices:
|
164 |
+
raise APIError("Nebius", "No completion choices returned")
|
165 |
+
|
166 |
+
content = response.choices[0].message.content
|
167 |
+
if content is None:
|
168 |
+
raise APIError("Nebius", "Empty response content")
|
169 |
+
|
170 |
+
return content.strip()
|
171 |
+
|
172 |
+
except Exception as e:
|
173 |
+
if isinstance(e, APIError):
|
174 |
+
raise
|
175 |
+
raise APIError("Nebius", f"API call failed: {str(e)}")
|
176 |
+
|
177 |
+
def make_llm_completion(
|
178 |
+
model: str,
|
179 |
+
messages: List[Dict[str, str]],
|
180 |
+
temperature: float = 0.6,
|
181 |
+
response_format: Optional[Dict[str, Any]] = None
|
182 |
+
) -> str:
|
183 |
+
"""Make a completion request using the configured LLM provider."""
|
184 |
+
provider = api_config.llm_provider
|
185 |
+
|
186 |
+
try:
|
187 |
+
if provider == "nebius":
|
188 |
+
return make_nebius_completion(model, messages, temperature, response_format)
|
189 |
+
|
190 |
+
elif provider == "openai":
|
191 |
+
client = create_llm_client()
|
192 |
+
kwargs = {
|
193 |
+
"model": model,
|
194 |
+
"messages": messages,
|
195 |
+
"temperature": temperature,
|
196 |
+
}
|
197 |
+
# OpenAI only supports simple response_format, not the extended Nebius format
|
198 |
+
if response_format and response_format.get("type") == "json_object":
|
199 |
+
kwargs["response_format"] = {"type": "json_object"}
|
200 |
+
completion = client.chat.completions.create(**kwargs)
|
201 |
+
return completion.choices[0].message.content.strip()
|
202 |
+
|
203 |
+
elif provider == "anthropic":
|
204 |
+
client = create_llm_client()
|
205 |
+
# Convert OpenAI format to Anthropic format
|
206 |
+
anthropic_messages = []
|
207 |
+
system_message = None
|
208 |
+
|
209 |
+
for msg in messages:
|
210 |
+
if msg["role"] == "system":
|
211 |
+
system_message = msg["content"]
|
212 |
+
else:
|
213 |
+
anthropic_messages.append({
|
214 |
+
"role": msg["role"],
|
215 |
+
"content": msg["content"]
|
216 |
+
})
|
217 |
+
|
218 |
+
kwargs = {
|
219 |
+
"model": model,
|
220 |
+
"messages": anthropic_messages,
|
221 |
+
"temperature": temperature,
|
222 |
+
"max_tokens": 1000,
|
223 |
+
}
|
224 |
+
if system_message:
|
225 |
+
kwargs["system"] = system_message
|
226 |
+
|
227 |
+
response = client.messages.create(**kwargs)
|
228 |
+
return response.content[0].text.strip()
|
229 |
+
|
230 |
+
elif provider == "huggingface":
|
231 |
+
# Try HuggingFace with fallback to Nebius
|
232 |
+
hf_error = None
|
233 |
+
try:
|
234 |
+
client = create_llm_client()
|
235 |
+
|
236 |
+
# Try multiple HuggingFace API approaches
|
237 |
+
|
238 |
+
# Method 1: Try chat.completions.create (OpenAI-compatible)
|
239 |
+
try:
|
240 |
+
response = client.chat.completions.create(
|
241 |
+
model=model,
|
242 |
+
messages=messages,
|
243 |
+
temperature=temperature,
|
244 |
+
max_tokens=1000,
|
245 |
+
)
|
246 |
+
|
247 |
+
# Extract the response content
|
248 |
+
if hasattr(response, 'choices') and response.choices:
|
249 |
+
return response.choices[0].message.content.strip()
|
250 |
+
else:
|
251 |
+
return str(response).strip()
|
252 |
+
|
253 |
+
except Exception as e1:
|
254 |
+
hf_error = e1
|
255 |
+
|
256 |
+
# Method 2: Try chat_completion method (HuggingFace native)
|
257 |
+
try:
|
258 |
+
response = client.chat_completion(
|
259 |
+
messages=messages,
|
260 |
+
model=model,
|
261 |
+
temperature=temperature,
|
262 |
+
max_tokens=1000,
|
263 |
+
)
|
264 |
+
|
265 |
+
# Handle different response formats
|
266 |
+
if hasattr(response, 'generated_text'):
|
267 |
+
return response.generated_text.strip()
|
268 |
+
elif isinstance(response, dict) and 'generated_text' in response:
|
269 |
+
return response['generated_text'].strip()
|
270 |
+
elif isinstance(response, list) and len(response) > 0:
|
271 |
+
if isinstance(response[0], dict) and 'generated_text' in response[0]:
|
272 |
+
return response[0]['generated_text'].strip()
|
273 |
+
|
274 |
+
return str(response).strip()
|
275 |
+
|
276 |
+
except Exception as e2:
|
277 |
+
# Both HuggingFace methods failed
|
278 |
+
hf_error = f"Method 1: {str(e1)}. Method 2: {str(e2)}"
|
279 |
+
raise APIError("HuggingFace", f"All HuggingFace methods failed. {hf_error}")
|
280 |
+
|
281 |
+
except Exception as e:
|
282 |
+
# HuggingFace failed, try fallback to Nebius
|
283 |
+
if hf_error is None:
|
284 |
+
hf_error = str(e)
|
285 |
+
logger.warning(f"HuggingFace API failed: {hf_error}, falling back to Nebius")
|
286 |
+
|
287 |
+
try:
|
288 |
+
# Use Nebius model appropriate for the task
|
289 |
+
nebius_model = model_config.get_model_for_provider("question_enhancer", "nebius")
|
290 |
+
return make_nebius_completion(nebius_model, messages, temperature, response_format)
|
291 |
+
except Exception as nebius_error:
|
292 |
+
raise APIError("HuggingFace", f"HuggingFace failed: {hf_error}. Nebius fallback also failed: {str(nebius_error)}")
|
293 |
+
|
294 |
+
else:
|
295 |
+
raise APIError("Config", f"Unsupported LLM provider: {provider}")
|
296 |
+
|
297 |
+
except Exception as e:
|
298 |
+
raise APIError(provider.title(), f"Completion failed: {str(e)}")
|
299 |
+
|
300 |
+
|
301 |
+
async def make_async_llm_completion(
|
302 |
+
model: str,
|
303 |
+
messages: List[Dict[str, Any]],
|
304 |
+
temperature: float = 0.0,
|
305 |
+
response_format: Optional[Dict[str, Any]] = None,
|
306 |
+
) -> str:
|
307 |
+
"""Make an async completion request using the configured LLM provider."""
|
308 |
+
provider = api_config.llm_provider
|
309 |
+
|
310 |
+
try:
|
311 |
+
if provider == "nebius":
|
312 |
+
return await make_async_nebius_completion(model, messages, temperature, response_format)
|
313 |
+
|
314 |
+
elif provider == "openai":
|
315 |
+
client = create_async_llm_client()
|
316 |
+
kwargs = {
|
317 |
+
"model": model,
|
318 |
+
"messages": messages,
|
319 |
+
"temperature": temperature
|
320 |
+
}
|
321 |
+
if response_format and response_format.get("type") == "json_object":
|
322 |
+
kwargs["response_format"] = {"type": "json_object"}
|
323 |
+
|
324 |
+
response = await client.chat.completions.create(**kwargs)
|
325 |
+
|
326 |
+
if not response.choices:
|
327 |
+
raise APIError("OpenAI", "No completion choices returned")
|
328 |
+
|
329 |
+
content = response.choices[0].message.content
|
330 |
+
if content is None:
|
331 |
+
raise APIError("OpenAI", "Empty response content")
|
332 |
+
|
333 |
+
return content.strip()
|
334 |
+
|
335 |
+
elif provider == "anthropic":
|
336 |
+
client = create_async_llm_client()
|
337 |
+
anthropic_messages = []
|
338 |
+
system_message = None
|
339 |
+
|
340 |
+
for msg in messages:
|
341 |
+
if msg["role"] == "system":
|
342 |
+
system_message = msg["content"]
|
343 |
+
else:
|
344 |
+
anthropic_messages.append({
|
345 |
+
"role": msg["role"],
|
346 |
+
"content": msg["content"]
|
347 |
+
})
|
348 |
+
|
349 |
+
kwargs = {
|
350 |
+
"model": model,
|
351 |
+
"messages": anthropic_messages,
|
352 |
+
"temperature": temperature,
|
353 |
+
"max_tokens": 1000,
|
354 |
+
}
|
355 |
+
if system_message:
|
356 |
+
kwargs["system"] = system_message
|
357 |
+
|
358 |
+
response = await client.messages.create(**kwargs)
|
359 |
+
return response.content[0].text.strip()
|
360 |
+
|
361 |
+
elif provider == "huggingface":
|
362 |
+
# HuggingFace doesn't support async, fallback to Nebius
|
363 |
+
logger.warning("HuggingFace does not support async operations, falling back to Nebius")
|
364 |
+
|
365 |
+
try:
|
366 |
+
# Use Nebius model appropriate for the task
|
367 |
+
nebius_model = model_config.get_model_for_provider("question_enhancer", "nebius")
|
368 |
+
return await make_async_nebius_completion(nebius_model, messages, temperature, response_format)
|
369 |
+
except Exception as nebius_error:
|
370 |
+
raise APIError("HuggingFace", f"HuggingFace async not supported. Nebius fallback failed: {str(nebius_error)}")
|
371 |
+
|
372 |
+
else:
|
373 |
+
raise APIError("Config", f"Unsupported LLM provider: {provider}")
|
374 |
+
|
375 |
+
except Exception as e:
|
376 |
+
raise APIError(provider.title(), f"Async completion failed: {str(e)}")
|
377 |
+
|
378 |
+
async def async_tavily_search(query: str, max_results: int = 3) -> Dict[str, Any]:
|
379 |
+
"""Perform async web search using Tavily API."""
|
380 |
+
try:
|
381 |
+
async with aiohttp.ClientSession() as session:
|
382 |
+
url = "https://api.tavily.com/search"
|
383 |
+
headers = {
|
384 |
+
"Content-Type": "application/json"
|
385 |
+
}
|
386 |
+
data = {
|
387 |
+
"api_key": api_config.tavily_api_key,
|
388 |
+
"query": query,
|
389 |
+
"search_depth": "basic",
|
390 |
+
"max_results": max_results,
|
391 |
+
"include_answer": True
|
392 |
+
}
|
393 |
+
|
394 |
+
async with session.post(url, headers=headers, json=data) as response:
|
395 |
+
if response.status != 200:
|
396 |
+
raise APIError("Tavily", f"HTTP {response.status}: {await response.text()}")
|
397 |
+
|
398 |
+
result = await response.json()
|
399 |
+
return {
|
400 |
+
"query": result.get("query", query),
|
401 |
+
"tavily_answer": result.get("answer"),
|
402 |
+
"results": result.get("results", []),
|
403 |
+
"data_source": "Tavily Search API",
|
404 |
+
}
|
405 |
+
|
406 |
+
except aiohttp.ClientError as e:
|
407 |
+
raise APIError("Tavily", f"HTTP request failed: {str(e)}")
|
408 |
+
except Exception as e:
|
409 |
+
if isinstance(e, APIError):
|
410 |
+
raise
|
411 |
+
raise APIError("Tavily", f"Search failed: {str(e)}")
|
412 |
+
|
413 |
+
def format_search_results(results: List[Dict[str, Any]]) -> str:
|
414 |
+
"""Format search results into a readable string."""
|
415 |
+
if not results:
|
416 |
+
return "No search results found."
|
417 |
+
|
418 |
+
snippets = []
|
419 |
+
for idx, item in enumerate(results, 1):
|
420 |
+
title = item.get("title", "No Title")
|
421 |
+
url = item.get("url", "")
|
422 |
+
content = item.get("content", "")
|
423 |
+
|
424 |
+
snippet = f"Result {idx}:\nTitle: {title}\nURL: {url}\nSnippet: {content}\n"
|
425 |
+
snippets.append(snippet)
|
426 |
+
|
427 |
+
return "\n".join(snippets).strip()
|
428 |
+
|
429 |
+
def create_apa_citation(url: str, year: str = None) -> str:
|
430 |
+
"""Create a simple APA-style citation from a URL."""
|
431 |
+
if not year:
|
432 |
+
year = api_config.current_year
|
433 |
+
|
434 |
+
try:
|
435 |
+
domain = url.split("/")[2]
|
436 |
+
title = domain.replace("www.", "").split(".")[0].capitalize()
|
437 |
+
return f"{title}. ({year}). Retrieved from {url}"
|
438 |
+
except (IndexError, AttributeError):
|
439 |
+
return f"Unknown Source. ({year}). Retrieved from {url}"
|
pyproject.toml
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[project]
|
2 |
+
name = "mcp-hub-project"
|
3 |
+
version = "0.2.0"
|
4 |
+
description = "Advanced MCP Hub with Inter-Agent Communication and Performance Monitoring"
|
5 |
+
readme = "README.md"
|
6 |
+
requires-python = ">=3.12"
|
7 |
+
dependencies = [
|
8 |
+
"gradio-client>=1.10.2",
|
9 |
+
"gradio[mcp]>=5.33.0",
|
10 |
+
"modal>=1.0.2",
|
11 |
+
"openai>=1.84.0",
|
12 |
+
"tavily-python>=0.7.4",
|
13 |
+
"python-dotenv>=1.0.0",
|
14 |
+
"psutil>=5.9.0",
|
15 |
+
"aiohttp>=3.8.0",
|
16 |
+
"anthropic>=0.52.2",
|
17 |
+
"huggingface>=0.0.1",
|
18 |
+
"huggingface-hub>=0.32.4",
|
19 |
+
]
|
20 |
+
|
21 |
+
[project.optional-dependencies]
|
22 |
+
dev = [
|
23 |
+
"pytest>=7.4.0",
|
24 |
+
"pytest-cov>=4.1.0",
|
25 |
+
"black>=23.0.0",
|
26 |
+
"isort>=5.12.0",
|
27 |
+
"mypy>=1.5.0",
|
28 |
+
]
|
pytest.ini
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[pytest]
|
2 |
+
minversion = 6.0
|
3 |
+
addopts = -ra --strict-markers --strict-config --cov=app --cov=mcp_hub --cov-report=term-missing --cov-report=html:htmlcov --cov-branch
|
4 |
+
testpaths = tests
|
5 |
+
markers =
|
6 |
+
unit: Unit tests
|
7 |
+
integration: Integration tests
|
8 |
+
async_test: Async test cases
|
9 |
+
slow: Slow running tests
|
10 |
+
requires_api: Tests that need API keys
|
11 |
+
asyncio_mode = auto
|
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio-client>=1.10.2,
|
2 |
+
gradio[mcp]>=5.33.0,
|
3 |
+
modal>=1.0.2
|
4 |
+
openai>=1.84.0
|
5 |
+
tavily-python>=0.7.4
|
6 |
+
python-dotenv>=1.0.0
|
7 |
+
psutil>=5.9.0
|
8 |
+
aiohttp>=3.8.0
|
9 |
+
anthropic>=0.52.2
|
10 |
+
huggingface>=0.0.1
|
11 |
+
huggingface-hub>=0.32.4
|
tests/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"""Test package for MCP Hub."""
|
tests/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (199 Bytes). View file
|
|
tests/__pycache__/conftest.cpython-312-pytest-8.4.0.pyc
ADDED
Binary file (5.85 kB). View file
|
|
tests/conftest.py
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Common test fixtures and configuration."""
|
2 |
+
|
3 |
+
import pytest
|
4 |
+
import asyncio
|
5 |
+
import os
|
6 |
+
from unittest.mock import Mock, MagicMock, patch
|
7 |
+
from typing import Dict, Any, Generator
|
8 |
+
|
9 |
+
# Mock environment variables for testing - set them globally before any imports
|
10 |
+
TEST_ENV_VARS = {
|
11 |
+
"TAVILY_API_KEY": "tvly-test-key-12345",
|
12 |
+
"NEBIUS_API_KEY": "test-nebius-key",
|
13 |
+
"OPENAI_API_KEY": "test-openai-key",
|
14 |
+
"ANTHROPIC_API_KEY": "test-anthropic-key",
|
15 |
+
"HUGGINGFACE_API_KEY": "test-hf-key",
|
16 |
+
"LLM_PROVIDER": "nebius"
|
17 |
+
}
|
18 |
+
|
19 |
+
# Set environment variables immediately
|
20 |
+
for key, value in TEST_ENV_VARS.items():
|
21 |
+
os.environ[key] = value
|
22 |
+
|
23 |
+
@pytest.fixture
|
24 |
+
def mock_tavily_client():
|
25 |
+
"""Mock Tavily client for web search tests."""
|
26 |
+
mock_client = Mock()
|
27 |
+
mock_client.search.return_value = {
|
28 |
+
"results": [
|
29 |
+
{
|
30 |
+
"title": "Test Result 1",
|
31 |
+
"url": "https://example.com/1",
|
32 |
+
"content": "Test content 1",
|
33 |
+
"score": 0.9
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"title": "Test Result 2",
|
37 |
+
"url": "https://example.com/2",
|
38 |
+
"content": "Test content 2",
|
39 |
+
"score": 0.8
|
40 |
+
}
|
41 |
+
],
|
42 |
+
"answer": "Test search summary"
|
43 |
+
}
|
44 |
+
return mock_client
|
45 |
+
|
46 |
+
@pytest.fixture
|
47 |
+
def mock_llm_response():
|
48 |
+
"""Mock LLM completion response."""
|
49 |
+
return '{"sub_questions": ["Question 1?", "Question 2?", "Question 3?"]}'
|
50 |
+
|
51 |
+
@pytest.fixture
|
52 |
+
def mock_modal_sandbox():
|
53 |
+
"""Mock Modal sandbox for code execution tests."""
|
54 |
+
mock_sandbox = Mock()
|
55 |
+
mock_sandbox.exec.return_value = Mock(stdout="Test output", stderr="", returncode=0)
|
56 |
+
return mock_sandbox
|
57 |
+
|
58 |
+
@pytest.fixture
|
59 |
+
def sample_user_request():
|
60 |
+
"""Sample user request for testing."""
|
61 |
+
return "Create a Python script to analyze CSV data and generate charts"
|
62 |
+
|
63 |
+
@pytest.fixture
|
64 |
+
def sample_search_results():
|
65 |
+
"""Sample search results for testing."""
|
66 |
+
return [
|
67 |
+
{
|
68 |
+
"title": "Python Data Analysis Tutorial",
|
69 |
+
"url": "https://example.com/pandas-tutorial",
|
70 |
+
"content": "Learn how to analyze CSV data with pandas and matplotlib...",
|
71 |
+
"score": 0.95
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"title": "Chart Generation with Python",
|
75 |
+
"url": "https://example.com/charts",
|
76 |
+
"content": "Create stunning charts and visualizations...",
|
77 |
+
"score": 0.87
|
78 |
+
}
|
79 |
+
]
|
80 |
+
|
81 |
+
@pytest.fixture
|
82 |
+
def sample_code():
|
83 |
+
"""Sample Python code for testing."""
|
84 |
+
return '''
|
85 |
+
import pandas as pd
|
86 |
+
import matplotlib.pyplot as plt
|
87 |
+
|
88 |
+
# Load data
|
89 |
+
df = pd.read_csv('data.csv')
|
90 |
+
|
91 |
+
# Generate chart
|
92 |
+
df.plot(kind='bar')
|
93 |
+
plt.show()
|
94 |
+
'''
|
95 |
+
|
96 |
+
@pytest.fixture
|
97 |
+
def mock_config():
|
98 |
+
"""Mock configuration objects."""
|
99 |
+
api_config = Mock()
|
100 |
+
api_config.tavily_api_key = "tvly-test-key"
|
101 |
+
api_config.llm_provider = "nebius"
|
102 |
+
api_config.nebius_api_key = "test-nebius-key"
|
103 |
+
|
104 |
+
model_config = Mock()
|
105 |
+
model_config.get_model_for_provider.return_value = "meta-llama/llama-3.1-8b-instruct"
|
106 |
+
|
107 |
+
return api_config, model_config
|
108 |
+
|
109 |
+
@pytest.fixture
|
110 |
+
def event_loop():
|
111 |
+
"""Create an event loop for async tests."""
|
112 |
+
loop = asyncio.new_event_loop()
|
113 |
+
yield loop
|
114 |
+
loop.close()
|
115 |
+
|
116 |
+
class MockAgent:
|
117 |
+
"""Base mock agent class for testing."""
|
118 |
+
def __init__(self, name: str):
|
119 |
+
self.name = name
|
120 |
+
self.call_count = 0
|
121 |
+
|
122 |
+
def __call__(self, *args, **kwargs):
|
123 |
+
self.call_count += 1
|
124 |
+
return {"success": True, "agent": self.name, "calls": self.call_count}
|
125 |
+
|
126 |
+
@pytest.fixture
|
127 |
+
def mock_agents():
|
128 |
+
"""Mock agent instances for orchestrator testing."""
|
129 |
+
return {
|
130 |
+
"question_enhancer": MockAgent("question_enhancer"),
|
131 |
+
"web_search": MockAgent("web_search"),
|
132 |
+
"llm_processor": MockAgent("llm_processor"),
|
133 |
+
"citation_formatter": MockAgent("citation_formatter"),
|
134 |
+
"code_generator": MockAgent("code_generator"),
|
135 |
+
"code_runner": MockAgent("code_runner")
|
136 |
+
}
|
137 |
+
|
138 |
+
@pytest.fixture
|
139 |
+
def disable_advanced_features():
|
140 |
+
"""Disable advanced features for basic testing."""
|
141 |
+
with patch('app.ADVANCED_FEATURES_AVAILABLE', False):
|
142 |
+
yield
|
tests/integration/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"""Integration tests package."""
|
tests/integration/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (212 Bytes). View file
|
|
tests/integration/__pycache__/test_async_sync_error_handling.cpython-312-pytest-8.4.0.pyc
ADDED
Binary file (29 kB). View file
|
|
tests/integration/__pycache__/test_end_to_end_workflow.cpython-312-pytest-8.4.0.pyc
ADDED
Binary file (31 kB). View file
|
|
tests/integration/__pycache__/test_performance_resources.cpython-312-pytest-8.4.0.pyc
ADDED
Binary file (43.3 kB). View file
|
|
tests/integration/__pycache__/test_ui_endpoints.cpython-312-pytest-8.4.0.pyc
ADDED
Binary file (48.1 kB). View file
|
|
tests/unit/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"""Unit tests package."""
|
tests/unit/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (198 Bytes). View file
|
|
tests/unit/__pycache__/test_citation_formatter_agent.cpython-312-pytest-8.4.0.pyc
ADDED
Binary file (6.26 kB). View file
|
|
tests/unit/__pycache__/test_code_generator_agent.cpython-312-pytest-8.4.0.pyc
ADDED
Binary file (7.76 kB). View file
|
|
tests/unit/__pycache__/test_code_runner_agent.cpython-312-pytest-8.4.0.pyc
ADDED
Binary file (29.9 kB). View file
|
|
tests/unit/__pycache__/test_llm_processor_agent.cpython-312-pytest-8.4.0.pyc
ADDED
Binary file (5.85 kB). View file
|
|
tests/unit/__pycache__/test_orchestrator_agent.cpython-312-pytest-8.4.0.pyc
ADDED
Binary file (30.1 kB). View file
|
|