josondev commited on
Commit
ee20f70
Β·
verified Β·
1 Parent(s): 203942a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -150
app.py CHANGED
@@ -1,87 +1,48 @@
1
- """ Enhanced Multi-LLM Agent Evaluation Runner with Agno Integration"""
2
  import os
3
  import gradio as gr
4
  import requests
5
  import pandas as pd
6
  from langchain_core.messages import HumanMessage
7
 
8
- # Import the enhanced classes from veryfinal.py in the same directory
9
- try:
10
- from veryfinal import (
11
- build_graph,
12
- UnifiedAgnoEnhancedSystem,
13
- AgnoEnhancedAgentSystem,
14
- AgnoEnhancedModelManager
15
- )
16
- VERYFINAL_AVAILABLE = True
17
- except ImportError as e:
18
- print(f"Error importing from veryfinal.py: {e}")
19
- VERYFINAL_AVAILABLE = False
20
 
21
  # --- Constants ---
22
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
23
 
24
- # --- Enhanced Agent Definition ---
25
- class EnhancedMultiLLMAgent:
26
- """A multi-provider Agno agent with NVIDIA + open-source model integration."""
27
  def __init__(self):
28
- print("Enhanced Multi-LLM Agent with Agno Integration initialized.")
29
-
30
- if not VERYFINAL_AVAILABLE:
31
- print("Error: veryfinal.py not properly imported")
32
- self.system = None
33
- self.graph = None
34
- return
35
-
36
  try:
37
- # Use the unified Agno enhanced system
38
  self.system = UnifiedAgnoEnhancedSystem()
39
- self.graph = self.system.graph
40
-
41
- # Display system information
42
- if self.system.agno_system:
43
- info = self.system.get_system_info()
44
- print(f"System initialized with {info.get('total_models', 0)} models")
45
- if info.get('nvidia_available'):
46
- print("βœ… NVIDIA NIM models available")
47
- print(f"Active agents: {info.get('active_agents', [])}")
48
-
49
- print("Enhanced Agno Multi-LLM System built successfully.")
50
  except Exception as e:
51
- print(f"Error building enhanced system: {e}")
52
- self.graph = None
53
  self.system = None
54
 
55
  def __call__(self, question: str) -> str:
56
- print(f"Agent received question: {question[:100]}...")
57
 
58
  if self.system is None:
59
- return "Error: Agent not properly initialized"
60
 
61
  try:
62
- # Use the enhanced system's process_query method
63
  answer = self.system.process_query(question)
64
 
65
- # Additional validation
66
  if not answer or answer == question or len(answer.strip()) == 0:
67
  return "Information not available"
68
 
69
- # Clean up the answer
70
- answer = answer.strip()
71
-
72
- # Ensure proper formatting for evaluation
73
- if "FINAL ANSWER:" in answer:
74
- answer = answer.split("FINAL ANSWER:")[-1].strip()
75
-
76
- return answer
77
 
78
  except Exception as e:
79
- error_msg = f"Error: {str(e)}"
80
- print(error_msg)
81
- return error_msg
82
 
83
  def run_and_submit_all(profile: gr.OAuthProfile | None):
84
- """Fetch questions, run enhanced Agno agent, and submit answers."""
85
  space_id = os.getenv("SPACE_ID")
86
 
87
  if profile:
@@ -95,146 +56,115 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
95
  questions_url = f"{api_url}/questions"
96
  submit_url = f"{api_url}/submit"
97
 
98
- # 1. Instantiate Enhanced Agent
99
  try:
100
- agent = EnhancedMultiLLMAgent()
101
  if agent.system is None:
102
- return "Error: Failed to initialize enhanced agent properly", None
103
  except Exception as e:
104
- print(f"Error instantiating agent: {e}")
105
  return f"Error initializing agent: {e}", None
106
 
107
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "No space ID available"
108
- print(f"Agent code URL: {agent_code}")
109
 
110
  # 2. Fetch Questions
111
- print(f"Fetching questions from: {questions_url}")
112
  try:
113
  response = requests.get(questions_url, timeout=15)
114
  response.raise_for_status()
115
  questions_data = response.json()
116
  if not questions_data:
117
- print("Fetched questions list is empty.")
118
- return "Fetched questions list is empty or invalid format.", None
119
- print(f"Fetched {len(questions_data)} questions.")
120
  except Exception as e:
121
- print(f"Error fetching questions: {e}")
122
  return f"Error fetching questions: {e}", None
123
 
124
- # 3. Run Enhanced Agno Agent
125
  results_log = []
126
  answers_payload = []
127
- print(f"Running Enhanced Agno Multi-LLM agent on {len(questions_data)} questions...")
128
 
129
  for i, item in enumerate(questions_data):
130
  task_id = item.get("task_id")
131
  question_text = item.get("question")
132
 
133
  if not task_id or question_text is None:
134
- print(f"Skipping item with missing task_id or question: {item}")
135
  continue
136
 
137
- print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")
138
 
139
  try:
140
- submitted_answer = agent(question_text)
141
 
142
- # Additional validation to prevent question repetition
143
- if submitted_answer == question_text or submitted_answer.startswith(question_text):
144
- submitted_answer = "Information not available"
145
 
146
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
147
  results_log.append({
148
- "Task ID": task_id,
149
- "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
150
- "Submitted Answer": submitted_answer[:200] + "..." if len(submitted_answer) > 200 else submitted_answer
151
  })
152
  except Exception as e:
153
- error_msg = f"AGENT ERROR: {e}"
154
- print(f"Error running agent on task {task_id}: {e}")
155
  answers_payload.append({"task_id": task_id, "submitted_answer": error_msg})
156
  results_log.append({
157
- "Task ID": task_id,
158
- "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
159
  "Submitted Answer": error_msg
160
  })
161
 
162
  if not answers_payload:
163
- print("Agent did not produce any answers to submit.")
164
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
165
 
166
- # 4. Prepare Submission
167
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
168
- status_update = f"Enhanced Agno Multi-LLM Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
169
- print(status_update)
170
-
171
- # 5. Submit
172
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
173
  try:
174
  response = requests.post(submit_url, json=submission_data, timeout=60)
175
  response.raise_for_status()
176
  result_data = response.json()
 
177
  final_status = (
178
- f"Submission Successful!\n"
179
  f"User: {result_data.get('username')}\n"
180
- f"Overall Score: {result_data.get('score', 'N/A')}% "
181
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
182
- f"Message: {result_data.get('message', 'No message received.')}"
183
  )
184
- print("Submission successful.")
185
- results_df = pd.DataFrame(results_log)
186
- return final_status, results_df
187
  except Exception as e:
188
- status_message = f"Submission Failed: {e}"
189
- print(status_message)
190
- results_df = pd.DataFrame(results_log)
191
- return status_message, results_df
192
 
193
- # --- Build Gradio Interface ---
194
  with gr.Blocks() as demo:
195
- gr.Markdown("# Enhanced Multi-LLM Agent with Agno + NVIDIA Integration")
196
  gr.Markdown(
197
  """
198
- **Instructions:**
199
- 1. Log in to your Hugging Face account using the button below.
200
- 2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
201
-
202
- **Enhanced Agent Features:**
203
- - **NVIDIA NIM Models**: Enterprise-grade optimized models for maximum accuracy
204
- - **Open-Source Models**: Groq, Ollama, Together AI, Anyscale, Hugging Face
205
- - **Specialized Agents**: Enterprise research, advanced math, coding, fast response
206
- - **Intelligent Routing**: Automatically selects best model/agent for each task
207
- - **Advanced Tools**: DuckDuckGo search, Wikipedia, calculator, reasoning tools
208
- - **Agno Framework**: Professional agent framework with memory and tool integration
209
 
210
- **Available Model Providers:**
211
- - **NVIDIA NIM**: meta/llama3-70b-instruct, meta/codellama-70b-instruct, etc.
212
- - **Groq (Free)**: llama3-70b-8192, llama3-8b-8192, mixtral-8x7b-32768
213
- - **Ollama (Local)**: llama3, mistral, phi3, codellama, gemma, qwen
214
- - **Together AI**: Meta-Llama models, Mistral, Qwen
215
- - **Anyscale**: Enterprise hosting for open-source models
216
- - **Hugging Face**: Direct model access
217
 
218
- **Routing Examples:**
219
- - Enterprise: "Enterprise analysis of quantum computing" β†’ NVIDIA NIM
220
- - Math: "Calculate 25 Γ— 17" β†’ Advanced Math Agent
221
- - Code: "Write Python factorial function" β†’ Advanced Coding Agent
222
- - Research: "Find Mercedes Sosa discography" β†’ Enterprise Research Agent
223
- - Quick: "Capital of France?" β†’ Fast Response Agent
224
 
225
- **Setup Requirements:**
226
- - NVIDIA_API_KEY for enterprise models (optional)
227
- - GROQ_API_KEY for free tier models
228
- - Other API keys optional for additional providers
229
  """
230
  )
231
 
232
  gr.LoginButton()
233
-
234
- run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
235
-
236
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
237
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
238
 
239
  run_button.click(
240
  fn=run_and_submit_all,
@@ -242,20 +172,5 @@ with gr.Blocks() as demo:
242
  )
243
 
244
  if __name__ == "__main__":
245
- print("\n" + "-"*30 + " Enhanced Agno Multi-LLM Agent Starting " + "-"*30)
246
-
247
- # Display system status
248
- if VERYFINAL_AVAILABLE:
249
- try:
250
- test_system = UnifiedAgnoEnhancedSystem()
251
- info = test_system.get_system_info()
252
- print(f"βœ… System ready with {info.get('total_models', 0)} models")
253
- print(f"πŸ“Š Model breakdown: {len(info.get('model_breakdown', {}).get('nvidia_models', []))} NVIDIA, "
254
- f"{len(info.get('model_breakdown', {}).get('groq_models', []))} Groq, "
255
- f"{len(info.get('model_breakdown', {}).get('ollama_models', []))} Ollama")
256
- except Exception as e:
257
- print(f"⚠️ System initialization warning: {e}")
258
- else:
259
- print("❌ veryfinal.py not properly imported")
260
-
261
  demo.launch(debug=True, share=False)
 
1
+ """ Working Multi-LLM Agent Evaluation Runner"""
2
  import os
3
  import gradio as gr
4
  import requests
5
  import pandas as pd
6
  from langchain_core.messages import HumanMessage
7
 
8
+ # Import from veryfinal.py
9
+ from veryfinal import UnifiedAgnoEnhancedSystem
 
 
 
 
 
 
 
 
 
 
10
 
11
  # --- Constants ---
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
 
14
+ # --- Working Agent Definition ---
15
+ class WorkingMultiLLMAgent:
16
+ """A working multi-LLM agent that actually answers questions"""
17
  def __init__(self):
18
+ print("Working Multi-LLM Agent initialized.")
 
 
 
 
 
 
 
19
  try:
 
20
  self.system = UnifiedAgnoEnhancedSystem()
21
+ print("βœ… Working system built successfully.")
 
 
 
 
 
 
 
 
 
 
22
  except Exception as e:
23
+ print(f"❌ Error building system: {e}")
 
24
  self.system = None
25
 
26
  def __call__(self, question: str) -> str:
27
+ print(f"Processing: {question[:100]}...")
28
 
29
  if self.system is None:
30
+ return "Error: System not initialized"
31
 
32
  try:
 
33
  answer = self.system.process_query(question)
34
 
35
+ # Validation
36
  if not answer or answer == question or len(answer.strip()) == 0:
37
  return "Information not available"
38
 
39
+ return answer.strip()
 
 
 
 
 
 
 
40
 
41
  except Exception as e:
42
+ return f"Error: {str(e)}"
 
 
43
 
44
  def run_and_submit_all(profile: gr.OAuthProfile | None):
45
+ """Run evaluation with working agent"""
46
  space_id = os.getenv("SPACE_ID")
47
 
48
  if profile:
 
56
  questions_url = f"{api_url}/questions"
57
  submit_url = f"{api_url}/submit"
58
 
59
+ # 1. Instantiate Working Agent
60
  try:
61
+ agent = WorkingMultiLLMAgent()
62
  if agent.system is None:
63
+ return "Error: Failed to initialize working agent", None
64
  except Exception as e:
 
65
  return f"Error initializing agent: {e}", None
66
 
67
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "No space ID"
 
68
 
69
  # 2. Fetch Questions
 
70
  try:
71
  response = requests.get(questions_url, timeout=15)
72
  response.raise_for_status()
73
  questions_data = response.json()
74
  if not questions_data:
75
+ return "No questions fetched", None
76
+ print(f"βœ… Fetched {len(questions_data)} questions")
 
77
  except Exception as e:
 
78
  return f"Error fetching questions: {e}", None
79
 
80
+ # 3. Process Questions
81
  results_log = []
82
  answers_payload = []
 
83
 
84
  for i, item in enumerate(questions_data):
85
  task_id = item.get("task_id")
86
  question_text = item.get("question")
87
 
88
  if not task_id or question_text is None:
 
89
  continue
90
 
91
+ print(f"Processing {i+1}/{len(questions_data)}: {task_id}")
92
 
93
  try:
94
+ answer = agent(question_text)
95
 
96
+ # Prevent question repetition
97
+ if answer == question_text or answer.startswith(question_text):
98
+ answer = "Information not available"
99
 
100
+ answers_payload.append({"task_id": task_id, "submitted_answer": answer})
101
  results_log.append({
102
+ "Task ID": task_id,
103
+ "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
104
+ "Submitted Answer": answer[:200] + "..." if len(answer) > 200 else answer
105
  })
106
  except Exception as e:
107
+ error_msg = f"ERROR: {e}"
 
108
  answers_payload.append({"task_id": task_id, "submitted_answer": error_msg})
109
  results_log.append({
110
+ "Task ID": task_id,
111
+ "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
112
  "Submitted Answer": error_msg
113
  })
114
 
115
  if not answers_payload:
116
+ return "No answers generated", pd.DataFrame(results_log)
 
117
 
118
+ # 4. Submit Results
119
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
120
+
 
 
 
 
121
  try:
122
  response = requests.post(submit_url, json=submission_data, timeout=60)
123
  response.raise_for_status()
124
  result_data = response.json()
125
+
126
  final_status = (
127
+ f"βœ… Submission Successful!\n"
128
  f"User: {result_data.get('username')}\n"
129
+ f"Score: {result_data.get('score', 'N/A')}% "
130
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
131
+ f"Message: {result_data.get('message', 'Success')}"
132
  )
133
+
134
+ return final_status, pd.DataFrame(results_log)
 
135
  except Exception as e:
136
+ return f"❌ Submission Failed: {e}", pd.DataFrame(results_log)
 
 
 
137
 
138
+ # --- Gradio Interface ---
139
  with gr.Blocks() as demo:
140
+ gr.Markdown("# Working Multi-LLM Agent System")
141
  gr.Markdown(
142
  """
143
+ **βœ… This is a WORKING system that will actually answer questions!**
 
 
 
 
 
 
 
 
 
 
144
 
145
+ **Features:**
146
+ - **Groq Llama-3 70B**: High-quality responses
147
+ - **Smart Routing**: Math, search, wiki, and general queries
148
+ - **Web Search**: Tavily integration for current information
149
+ - **Wikipedia**: Encyclopedic knowledge access
150
+ - **Robust Error Handling**: Fallbacks and validation
 
151
 
152
+ **Instructions:**
153
+ 1. Log in with your Hugging Face account
154
+ 2. Click 'Run Evaluation & Submit All Answers'
155
+ 3. Wait for processing to complete
156
+ 4. View your results and score
 
157
 
158
+ **Requirements:**
159
+ - GROQ_API_KEY in your environment variables
160
+ - TAVILY_API_KEY (optional, for web search)
 
161
  """
162
  )
163
 
164
  gr.LoginButton()
165
+ run_button = gr.Button("πŸš€ Run Evaluation & Submit All Answers", variant="primary")
166
+ status_output = gr.Textbox(label="Status", lines=5, interactive=False)
167
+ results_table = gr.DataFrame(label="Results", wrap=True)
 
 
168
 
169
  run_button.click(
170
  fn=run_and_submit_all,
 
172
  )
173
 
174
  if __name__ == "__main__":
175
+ print("πŸš€ Starting Working Multi-LLM Agent System")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  demo.launch(debug=True, share=False)