File size: 6,311 Bytes
ee20f70 10e9b7d 95555bb 87340ea b1b09d8 ee20f70 87340ea 38a1a77 95555bb ee20f70 95555bb ee20f70 38a1a77 b1b09d8 ee20f70 38a1a77 ee20f70 4dcc8e2 cd32eb4 95555bb ee20f70 38a1a77 b1b09d8 ee20f70 38a1a77 4dcc8e2 2d9c7ce ee20f70 4dcc8e2 15b6891 b1b09d8 ee20f70 4dcc8e2 38a1a77 ee20f70 c3f6914 cd32eb4 ee20f70 80c837c 38a1a77 95555bb 80c837c 95555bb 7e4a06b 95555bb e80aab9 95555bb ee20f70 31243f4 ee20f70 b1b09d8 ee20f70 31243f4 95555bb 38a1a77 ee20f70 95555bb 38a1a77 95555bb ee20f70 80c837c 95555bb ee20f70 95555bb 38a1a77 95555bb 38a1a77 95555bb 38a1a77 ee20f70 38a1a77 95555bb ee20f70 4dcc8e2 ee20f70 4dcc8e2 ee20f70 38a1a77 ee20f70 38a1a77 95555bb ee20f70 38a1a77 ee20f70 38a1a77 95555bb ee20f70 95555bb ee20f70 95555bb ee20f70 95555bb ee20f70 95555bb ee20f70 95555bb ee20f70 95555bb ee20f70 95555bb ee20f70 95555bb ee20f70 3c4371f ee20f70 e80aab9 ee20f70 95555bb ee20f70 b1b09d8 ee20f70 38a1a77 ee20f70 4dcc8e2 ee20f70 95555bb e25ef11 95555bb ee20f70 95555bb e80aab9 ee20f70 cd32eb4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
""" Working Multi-LLM Agent Evaluation Runner"""
import os
import gradio as gr
import requests
import pandas as pd
from langchain_core.messages import HumanMessage
# Import from veryfinal.py
from veryfinal import UnifiedAgnoEnhancedSystem
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# --- Working Agent Definition ---
class WorkingMultiLLMAgent:
"""A working multi-LLM agent that actually answers questions"""
def __init__(self):
print("Working Multi-LLM Agent initialized.")
try:
self.system = UnifiedAgnoEnhancedSystem()
print("β
Working system built successfully.")
except Exception as e:
print(f"β Error building system: {e}")
self.system = None
def __call__(self, question: str) -> str:
print(f"Processing: {question[:100]}...")
if self.system is None:
return "Error: System not initialized"
try:
answer = self.system.process_query(question)
# Validation
if not answer or answer == question or len(answer.strip()) == 0:
return "Information not available"
return answer.strip()
except Exception as e:
return f"Error: {str(e)}"
def run_and_submit_all(profile: gr.OAuthProfile | None):
"""Run evaluation with working agent"""
space_id = os.getenv("SPACE_ID")
if profile:
username = f"{profile.username}"
print(f"User logged in: {username}")
else:
print("User not logged in.")
return "Please Login to Hugging Face with the button.", None
api_url = DEFAULT_API_URL
questions_url = f"{api_url}/questions"
submit_url = f"{api_url}/submit"
# 1. Instantiate Working Agent
try:
agent = WorkingMultiLLMAgent()
if agent.system is None:
return "Error: Failed to initialize working agent", None
except Exception as e:
return f"Error initializing agent: {e}", None
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "No space ID"
# 2. Fetch Questions
try:
response = requests.get(questions_url, timeout=15)
response.raise_for_status()
questions_data = response.json()
if not questions_data:
return "No questions fetched", None
print(f"β
Fetched {len(questions_data)} questions")
except Exception as e:
return f"Error fetching questions: {e}", None
# 3. Process Questions
results_log = []
answers_payload = []
for i, item in enumerate(questions_data):
task_id = item.get("task_id")
question_text = item.get("question")
if not task_id or question_text is None:
continue
print(f"Processing {i+1}/{len(questions_data)}: {task_id}")
try:
answer = agent(question_text)
# Prevent question repetition
if answer == question_text or answer.startswith(question_text):
answer = "Information not available"
answers_payload.append({"task_id": task_id, "submitted_answer": answer})
results_log.append({
"Task ID": task_id,
"Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
"Submitted Answer": answer[:200] + "..." if len(answer) > 200 else answer
})
except Exception as e:
error_msg = f"ERROR: {e}"
answers_payload.append({"task_id": task_id, "submitted_answer": error_msg})
results_log.append({
"Task ID": task_id,
"Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
"Submitted Answer": error_msg
})
if not answers_payload:
return "No answers generated", pd.DataFrame(results_log)
# 4. Submit Results
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
try:
response = requests.post(submit_url, json=submission_data, timeout=60)
response.raise_for_status()
result_data = response.json()
final_status = (
f"β
Submission Successful!\n"
f"User: {result_data.get('username')}\n"
f"Score: {result_data.get('score', 'N/A')}% "
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
f"Message: {result_data.get('message', 'Success')}"
)
return final_status, pd.DataFrame(results_log)
except Exception as e:
return f"β Submission Failed: {e}", pd.DataFrame(results_log)
# --- Gradio Interface ---
with gr.Blocks() as demo:
gr.Markdown("# Working Multi-LLM Agent System")
gr.Markdown(
"""
**β
This is a WORKING system that will actually answer questions!**
**Features:**
- **Groq Llama-3 70B**: High-quality responses
- **Smart Routing**: Math, search, wiki, and general queries
- **Web Search**: Tavily integration for current information
- **Wikipedia**: Encyclopedic knowledge access
- **Robust Error Handling**: Fallbacks and validation
**Instructions:**
1. Log in with your Hugging Face account
2. Click 'Run Evaluation & Submit All Answers'
3. Wait for processing to complete
4. View your results and score
**Requirements:**
- GROQ_API_KEY in your environment variables
- TAVILY_API_KEY (optional, for web search)
"""
)
gr.LoginButton()
run_button = gr.Button("π Run Evaluation & Submit All Answers", variant="primary")
status_output = gr.Textbox(label="Status", lines=5, interactive=False)
results_table = gr.DataFrame(label="Results", wrap=True)
run_button.click(
fn=run_and_submit_all,
outputs=[status_output, results_table]
)
if __name__ == "__main__":
print("π Starting Working Multi-LLM Agent System")
demo.launch(debug=True, share=False)
|