|
""" Enhanced Hybrid Agent Evaluation Runner""" |
|
import os |
|
import inspect |
|
import gradio as gr |
|
import requests |
|
import pandas as pd |
|
from langchain_core.messages import HumanMessage |
|
from agent import HybridLangGraphAgnoSystem |
|
|
|
|
|
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" |
|
|
|
|
|
class BasicAgent: |
|
"""A hybrid LangGraph + Agno agent with performance optimization.""" |
|
def __init__(self): |
|
print("BasicAgent initialized with Hybrid LangGraph + Agno System.") |
|
self.hybrid_system = HybridLangGraphAgnoSystem() |
|
|
|
def __call__(self, question: str) -> str: |
|
print(f"Agent received question: {question}") |
|
|
|
try: |
|
|
|
result = self.hybrid_system.process_query(question) |
|
|
|
|
|
answer = result.get("answer", "No response generated") |
|
|
|
|
|
if "FINAL ANSWER:" in answer: |
|
final_answer = answer.split("FINAL ANSWER:")[-1].strip() |
|
else: |
|
final_answer = answer.strip() |
|
|
|
|
|
metrics = result.get("performance_metrics", {}) |
|
provider = result.get("provider_used", "Unknown") |
|
processing_time = metrics.get("total_time", 0) |
|
|
|
print(f"Provider used: {provider}, Processing time: {processing_time:.2f}s") |
|
|
|
return final_answer |
|
|
|
except Exception as e: |
|
print(f"Error in agent processing: {e}") |
|
return f"Error: {str(e)}" |
|
|
|
def run_and_submit_all(profile: gr.OAuthProfile | None): |
|
""" |
|
Fetches all questions, runs the Enhanced Hybrid Agent on them, submits all answers, |
|
and displays the results with performance metrics. |
|
""" |
|
|
|
space_id = os.getenv("SPACE_ID") |
|
|
|
if profile: |
|
username= f"{profile.username}" |
|
print(f"User logged in: {username}") |
|
else: |
|
print("User not logged in.") |
|
return "Please Login to Hugging Face with the button.", None |
|
|
|
api_url = DEFAULT_API_URL |
|
questions_url = f"{api_url}/questions" |
|
submit_url = f"{api_url}/submit" |
|
|
|
|
|
try: |
|
agent = BasicAgent() |
|
print("β
Hybrid LangGraph + Agno Agent initialized successfully") |
|
except Exception as e: |
|
print(f"β Error instantiating hybrid agent: {e}") |
|
return f"Error initializing hybrid agent: {e}", None |
|
|
|
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" |
|
print(f"π Agent code repository: {agent_code}") |
|
|
|
|
|
print(f"π₯ Fetching questions from: {questions_url}") |
|
try: |
|
response = requests.get(questions_url, timeout=15) |
|
response.raise_for_status() |
|
questions_data = response.json() |
|
if not questions_data: |
|
print("β Fetched questions list is empty.") |
|
return "Fetched questions list is empty or invalid format.", None |
|
print(f"β
Fetched {len(questions_data)} questions successfully.") |
|
except requests.exceptions.RequestException as e: |
|
print(f"β Error fetching questions: {e}") |
|
return f"Error fetching questions: {e}", None |
|
except Exception as e: |
|
print(f"β An unexpected error occurred fetching questions: {e}") |
|
return f"An unexpected error occurred fetching questions: {e}", None |
|
|
|
|
|
results_log = [] |
|
answers_payload = [] |
|
performance_stats = { |
|
"langgraph_math": 0, |
|
"agno_research": 0, |
|
"langgraph_retrieval": 0, |
|
"agno_general": 0, |
|
"errors": 0, |
|
"total_processing_time": 0 |
|
} |
|
|
|
print(f"π Running Enhanced Hybrid Agent on {len(questions_data)} questions...") |
|
|
|
for i, item in enumerate(questions_data, 1): |
|
task_id = item.get("task_id") |
|
question_text = item.get("question") |
|
|
|
if not task_id or question_text is None: |
|
print(f"β οΈ Skipping item {i} with missing task_id or question: {item}") |
|
continue |
|
|
|
print(f"π Processing question {i}/{len(questions_data)}: {task_id}") |
|
|
|
try: |
|
|
|
detailed_result = agent.hybrid_system.process_query(question_text) |
|
submitted_answer = detailed_result.get("answer", "No response") |
|
|
|
|
|
if "FINAL ANSWER:" in submitted_answer: |
|
clean_answer = submitted_answer.split("FINAL ANSWER:")[-1].strip() |
|
else: |
|
clean_answer = submitted_answer.strip() |
|
|
|
|
|
provider = detailed_result.get("provider_used", "Unknown") |
|
processing_time = detailed_result.get("performance_metrics", {}).get("total_time", 0) |
|
|
|
|
|
if "LangGraph" in provider: |
|
if "Math" in provider: |
|
performance_stats["langgraph_math"] += 1 |
|
else: |
|
performance_stats["langgraph_retrieval"] += 1 |
|
elif "Agno" in provider: |
|
if "Research" in provider: |
|
performance_stats["agno_research"] += 1 |
|
else: |
|
performance_stats["agno_general"] += 1 |
|
|
|
performance_stats["total_processing_time"] += processing_time |
|
|
|
answers_payload.append({"task_id": task_id, "submitted_answer": clean_answer}) |
|
results_log.append({ |
|
"Task ID": task_id, |
|
"Question": question_text[:100] + "..." if len(question_text) > 100 else question_text, |
|
"Submitted Answer": clean_answer, |
|
"Provider": provider, |
|
"Processing Time (s)": f"{processing_time:.2f}" |
|
}) |
|
|
|
print(f"β
Question {i} processed successfully using {provider}") |
|
|
|
except Exception as e: |
|
print(f"β Error running agent on task {task_id}: {e}") |
|
performance_stats["errors"] += 1 |
|
results_log.append({ |
|
"Task ID": task_id, |
|
"Question": question_text[:100] + "..." if len(question_text) > 100 else question_text, |
|
"Submitted Answer": f"AGENT ERROR: {e}", |
|
"Provider": "Error", |
|
"Processing Time (s)": "0.00" |
|
}) |
|
|
|
if not answers_payload: |
|
print("β Agent did not produce any answers to submit.") |
|
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log) |
|
|
|
|
|
avg_processing_time = performance_stats["total_processing_time"] / len(answers_payload) if answers_payload else 0 |
|
performance_summary = f""" |
|
π Performance Summary: |
|
β’ LangGraph Math: {performance_stats['langgraph_math']} queries |
|
β’ Agno Research: {performance_stats['agno_research']} queries |
|
β’ LangGraph Retrieval: {performance_stats['langgraph_retrieval']} queries |
|
β’ Agno General: {performance_stats['agno_general']} queries |
|
β’ Errors: {performance_stats['errors']} queries |
|
β’ Average Processing Time: {avg_processing_time:.2f}s |
|
β’ Total Processing Time: {performance_stats['total_processing_time']:.2f}s |
|
""" |
|
print(performance_summary) |
|
|
|
|
|
submission_data = { |
|
"username": username.strip(), |
|
"agent_code": agent_code, |
|
"answers": answers_payload |
|
} |
|
status_update = f"π― Hybrid Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..." |
|
print(status_update) |
|
|
|
|
|
print(f"π€ Submitting {len(answers_payload)} answers to: {submit_url}") |
|
try: |
|
response = requests.post(submit_url, json=submission_data, timeout=120) |
|
response.raise_for_status() |
|
result_data = response.json() |
|
|
|
final_status = ( |
|
f"π Submission Successful!\n" |
|
f"π€ User: {result_data.get('username')}\n" |
|
f"π Overall Score: {result_data.get('score', 'N/A')}% " |
|
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n" |
|
f"π¬ Message: {result_data.get('message', 'No message received.')}\n" |
|
f"{performance_summary}" |
|
) |
|
print("β
Submission successful.") |
|
results_df = pd.DataFrame(results_log) |
|
return final_status, results_df |
|
|
|
except requests.exceptions.HTTPError as e: |
|
error_detail = f"Server responded with status {e.response.status_code}." |
|
try: |
|
error_json = e.response.json() |
|
error_detail += f" Detail: {error_json.get('detail', e.response.text)}" |
|
except requests.exceptions.JSONDecodeError: |
|
error_detail += f" Response: {e.response.text[:500]}" |
|
status_message = f"β Submission Failed: {error_detail}" |
|
print(status_message) |
|
results_df = pd.DataFrame(results_log) |
|
return status_message, results_df |
|
|
|
except requests.exceptions.Timeout: |
|
status_message = "β Submission Failed: The request timed out." |
|
print(status_message) |
|
results_df = pd.DataFrame(results_log) |
|
return status_message, results_df |
|
|
|
except requests.exceptions.RequestException as e: |
|
status_message = f"β Submission Failed: Network error - {e}" |
|
print(status_message) |
|
results_df = pd.DataFrame(results_log) |
|
return status_message, results_df |
|
|
|
except Exception as e: |
|
status_message = f"β An unexpected error occurred during submission: {e}" |
|
print(status_message) |
|
results_df = pd.DataFrame(results_log) |
|
return status_message, results_df |
|
|
|
|
|
with gr.Blocks(title="Enhanced Hybrid Agent Evaluation") as demo: |
|
gr.Markdown("# π Enhanced Hybrid LangGraph + Agno Agent Evaluation Runner") |
|
gr.Markdown( |
|
""" |
|
## π― **Advanced AI Agent System** |
|
|
|
This evaluation runner uses a **Hybrid LangGraph + Agno Agent System** that combines the best of both frameworks: |
|
|
|
### π§ **Intelligent Routing System** |
|
- **π’ Mathematical Queries** β LangGraph (Groq Llama 3.3 70B) - *Optimized for speed* |
|
- **π Complex Research** β Agno (Gemini 2.0 Flash-Lite) - *Optimized for reasoning* |
|
- **π Factual Retrieval** β LangGraph + FAISS Vector Store - *Optimized for accuracy* |
|
- **π General Queries** β Agno Multi-Agent System - *Optimized for comprehensiveness* |
|
|
|
### β‘ **Performance Features** |
|
- **Rate Limiting**: Intelligent rate management for free tier models |
|
- **Caching**: Performance optimization with query caching |
|
- **Fallback Systems**: Automatic provider switching on failures |
|
- **Performance Tracking**: Real-time metrics and provider usage stats |
|
|
|
### π **Tools & Capabilities** |
|
- Mathematical calculations (add, subtract, multiply, divide, modulus) |
|
- Web search (Tavily, Wikipedia, ArXiv) |
|
- FAISS vector database for similar question retrieval |
|
- Memory persistence across sessions |
|
|
|
--- |
|
|
|
**Instructions:** |
|
1. π Log in to your Hugging Face account using the button below |
|
2. π Click 'Run Evaluation & Submit All Answers' to start the evaluation |
|
3. π Monitor real-time performance metrics and provider usage |
|
4. π View your final score and detailed results |
|
|
|
**Note:** The hybrid system automatically selects the optimal AI provider for each question type to maximize both speed and accuracy. |
|
""" |
|
) |
|
|
|
gr.LoginButton() |
|
|
|
with gr.Row(): |
|
run_button = gr.Button( |
|
"π Run Evaluation & Submit All Answers", |
|
variant="primary", |
|
size="lg" |
|
) |
|
|
|
status_output = gr.Textbox( |
|
label="π Run Status / Submission Result", |
|
lines=10, |
|
interactive=False, |
|
placeholder="Status updates will appear here..." |
|
) |
|
|
|
results_table = gr.DataFrame( |
|
label="π Questions, Answers & Performance Metrics", |
|
wrap=True, |
|
height=400 |
|
) |
|
|
|
run_button.click( |
|
fn=run_and_submit_all, |
|
outputs=[status_output, results_table] |
|
) |
|
|
|
|
|
gr.Markdown( |
|
""" |
|
--- |
|
### π§ **System Information** |
|
- **Primary Models**: Groq Llama 3.3 70B, Gemini 2.0 Flash-Lite, NVIDIA Llama 3.1 70B |
|
- **Frameworks**: LangGraph + Agno Hybrid Architecture |
|
- **Vector Store**: FAISS with NVIDIA Embeddings |
|
- **Rate Limiting**: Advanced rate management with exponential backoff |
|
- **Memory**: Persistent agent memory with session summaries |
|
""" |
|
) |
|
|
|
if __name__ == "__main__": |
|
print("\n" + "="*80) |
|
print("π ENHANCED HYBRID AGENT EVALUATION RUNNER") |
|
print("="*80) |
|
|
|
|
|
space_host_startup = os.getenv("SPACE_HOST") |
|
space_id_startup = os.getenv("SPACE_ID") |
|
|
|
if space_host_startup: |
|
print(f"β
SPACE_HOST found: {space_host_startup}") |
|
print(f" π Runtime URL: https://{space_host_startup}.hf.space") |
|
else: |
|
print("βΉοΈ SPACE_HOST environment variable not found (running locally?).") |
|
|
|
if space_id_startup: |
|
print(f"β
SPACE_ID found: {space_id_startup}") |
|
print(f" π Repo URL: https://huggingface.co/spaces/{space_id_startup}") |
|
print(f" π³ Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main") |
|
else: |
|
print("βΉοΈ SPACE_ID environment variable not found (running locally?).") |
|
|
|
print("\nπ― System Features:") |
|
print(" β’ Hybrid LangGraph + Agno Architecture") |
|
print(" β’ Intelligent Query Routing") |
|
print(" β’ Performance Optimization") |
|
print(" β’ Advanced Rate Limiting") |
|
print(" β’ FAISS Vector Database") |
|
print(" β’ Multi-Provider Fallbacks") |
|
|
|
print("\n" + "="*80) |
|
print("π Launching Enhanced Gradio Interface...") |
|
print("="*80 + "\n") |
|
|
|
demo.launch(debug=True, share=False) |
|
|