josondev commited on
Commit
5e2ef30
·
verified ·
1 Parent(s): 08382a6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +176 -92
app.py CHANGED
@@ -1,52 +1,85 @@
1
- """ Working Multi-LLM Agent Evaluation Runner"""
2
  import os
 
3
  import gradio as gr
4
  import requests
5
  import pandas as pd
6
  from langchain_core.messages import HumanMessage
 
7
 
8
- # Import from veryfinal.py
9
- from veryfinal import UnifiedAgnoEnhancedSystem
10
 
 
 
11
  # --- Constants ---
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
 
14
- # --- Working Agent Definition ---
15
- class WorkingMultiLLMAgent:
16
- """A working multi-LLM agent that actually answers questions"""
 
 
 
17
  def __init__(self):
18
- print("Working Multi-LLM Agent initialized.")
19
  try:
20
- self.system = UnifiedAgnoEnhancedSystem()
21
- print("✅ Working system built successfully.")
 
 
22
  except Exception as e:
23
- print(f"Error building system: {e}")
 
24
  self.system = None
25
 
26
  def __call__(self, question: str) -> str:
27
- print(f"Processing: {question[:100]}...")
28
 
29
- if self.system is None:
30
- return "Error: System not initialized"
 
 
 
 
 
31
 
32
- try:
33
- answer = self.system.process_query(question)
34
-
35
- # Validation
36
- if not answer or answer == question or len(answer.strip()) == 0:
37
- return "Information not available"
38
-
39
- return answer.strip()
 
 
 
 
 
 
40
 
41
- except Exception as e:
42
- return f"Error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- def run_and_submit_all(profile: gr.OAuthProfile | None):
45
- """Run evaluation with working agent"""
46
- space_id = os.getenv("SPACE_ID")
47
-
48
  if profile:
49
- username = f"{profile.username}"
50
  print(f"User logged in: {username}")
51
  else:
52
  print("User not logged in.")
@@ -56,115 +89,146 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
56
  questions_url = f"{api_url}/questions"
57
  submit_url = f"{api_url}/submit"
58
 
59
- # 1. Instantiate Working Agent
60
  try:
61
- agent = WorkingMultiLLMAgent()
62
- if agent.system is None:
63
- return "Error: Failed to initialize working agent", None
64
  except Exception as e:
 
65
  return f"Error initializing agent: {e}", None
66
-
67
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "No space ID"
 
68
 
69
  # 2. Fetch Questions
 
70
  try:
71
  response = requests.get(questions_url, timeout=15)
72
  response.raise_for_status()
73
  questions_data = response.json()
74
  if not questions_data:
75
- return "No questions fetched", None
76
- print(f"Fetched {len(questions_data)} questions")
77
- except Exception as e:
 
 
78
  return f"Error fetching questions: {e}", None
 
 
 
 
 
 
 
79
 
80
- # 3. Process Questions
81
  results_log = []
82
  answers_payload = []
83
-
84
- for i, item in enumerate(questions_data):
85
  task_id = item.get("task_id")
86
  question_text = item.get("question")
87
-
88
  if not task_id or question_text is None:
 
89
  continue
90
-
91
- print(f"Processing {i+1}/{len(questions_data)}: {task_id}")
92
-
93
  try:
94
- answer = agent(question_text)
95
 
96
- # Prevent question repetition
97
- if answer == question_text or answer.startswith(question_text):
98
- answer = "Information not available"
99
 
100
- answers_payload.append({"task_id": task_id, "submitted_answer": answer})
101
- results_log.append({
102
- "Task ID": task_id,
103
- "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
104
- "Submitted Answer": answer[:200] + "..." if len(answer) > 200 else answer
105
- })
106
  except Exception as e:
107
- error_msg = f"ERROR: {e}"
108
- answers_payload.append({"task_id": task_id, "submitted_answer": error_msg})
109
- results_log.append({
110
- "Task ID": task_id,
111
- "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
112
- "Submitted Answer": error_msg
113
- })
114
 
115
  if not answers_payload:
116
- return "No answers generated", pd.DataFrame(results_log)
 
117
 
118
- # 4. Submit Results
119
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
120
-
 
 
 
 
121
  try:
122
  response = requests.post(submit_url, json=submission_data, timeout=60)
123
  response.raise_for_status()
124
  result_data = response.json()
125
-
126
  final_status = (
127
  f"✅ Submission Successful!\n"
128
  f"User: {result_data.get('username')}\n"
129
- f"Score: {result_data.get('score', 'N/A')}% "
130
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
131
- f"Message: {result_data.get('message', 'Success')}"
132
  )
133
-
134
- return final_status, pd.DataFrame(results_log)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  except Exception as e:
136
- return f" Submission Failed: {e}", pd.DataFrame(results_log)
 
 
 
137
 
138
- # --- Gradio Interface ---
 
139
  with gr.Blocks() as demo:
140
- gr.Markdown("# Working Multi-LLM Agent System")
141
  gr.Markdown(
142
  """
143
- **✅ This is a WORKING system that will actually answer questions!**
144
-
145
- **Features:**
146
- - **Groq Llama-3 70B**: High-quality responses
147
- - **Smart Routing**: Math, search, wiki, and general queries
148
- - **Web Search**: Tavily integration for current information
149
- - **Wikipedia**: Encyclopedic knowledge access
150
- - **Robust Error Handling**: Fallbacks and validation
151
-
152
  **Instructions:**
153
- 1. Log in with your Hugging Face account
154
- 2. Click 'Run Evaluation & Submit All Answers'
155
- 3. Wait for processing to complete
156
- 4. View your results and score
157
 
158
- **Requirements:**
159
- - GROQ_API_KEY in your environment variables
160
- - TAVILY_API_KEY (optional, for web search)
 
 
 
 
 
 
 
 
161
  """
162
  )
163
 
164
  gr.LoginButton()
165
- run_button = gr.Button("🚀 Run Evaluation & Submit All Answers", variant="primary")
166
- status_output = gr.Textbox(label="Status", lines=5, interactive=False)
167
- results_table = gr.DataFrame(label="Results", wrap=True)
 
 
 
168
 
169
  run_button.click(
170
  fn=run_and_submit_all,
@@ -172,5 +236,25 @@ with gr.Blocks() as demo:
172
  )
173
 
174
  if __name__ == "__main__":
175
- print("🚀 Starting Working Multi-LLM Agent System")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  demo.launch(debug=True, share=False)
 
1
+ """ Basic Agent Evaluation Runner"""
2
  import os
3
+ import inspect
4
  import gradio as gr
5
  import requests
6
  import pandas as pd
7
  from langchain_core.messages import HumanMessage
8
+ from veryfinal import build_graph, HybridLangGraphMultiLLMSystem # Changed import
9
 
 
 
10
 
11
+
12
+ # (Keep Constants as is)
13
  # --- Constants ---
14
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
 
16
+ # --- Basic Agent Definition ---
17
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
18
+
19
+
20
+ class BasicAgent:
21
+ """A langgraph agent."""
22
  def __init__(self):
23
+ print("BasicAgent initialized.")
24
  try:
25
+ self.graph = build_graph()
26
+ # Also initialize the system for better performance
27
+ self.system = HybridLangGraphMultiLLMSystem()
28
+ print("✅ Optimized system initialized successfully.")
29
  except Exception as e:
30
+ print(f"Error building graph: {e}")
31
+ self.graph = None
32
  self.system = None
33
 
34
  def __call__(self, question: str) -> str:
35
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
36
 
37
+ # Use the optimized system if available
38
+ if self.system:
39
+ try:
40
+ answer = self.system.process_query(question)
41
+ return answer
42
+ except Exception as e:
43
+ print(f"Error with optimized system: {e}")
44
 
45
+ # Fallback to original method if optimized system fails
46
+ if self.graph:
47
+ try:
48
+ # Create proper state for the graph
49
+ state = {
50
+ "messages": [HumanMessage(content=question)],
51
+ "query": question,
52
+ "agent_type": "",
53
+ "final_answer": "",
54
+ "perf": {},
55
+ "tools_used": []
56
+ }
57
+ config = {"configurable": {"thread_id": f"eval_{hash(question)}"}}
58
+ result = self.graph.invoke(state, config)
59
 
60
+ # Extract the answer properly
61
+ if isinstance(result, dict) and 'final_answer' in result:
62
+ return result['final_answer']
63
+ elif isinstance(result, dict) and 'messages' in result and result['messages']:
64
+ return result['messages'][-1].content
65
+ else:
66
+ return str(result)
67
+ except Exception as e:
68
+ return f"Error: {e}"
69
+
70
+ return "Error: System not initialized"
71
+
72
+
73
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
74
+ """
75
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
76
+ and displays the results.
77
+ """
78
+ # --- Determine HF Space Runtime URL and Repo URL ---
79
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
80
 
 
 
 
 
81
  if profile:
82
+ username= f"{profile.username}"
83
  print(f"User logged in: {username}")
84
  else:
85
  print("User not logged in.")
 
89
  questions_url = f"{api_url}/questions"
90
  submit_url = f"{api_url}/submit"
91
 
92
+ # 1. Instantiate Agent ( modify this part to create your agent)
93
  try:
94
+ agent = BasicAgent()
95
+ if agent.graph is None and agent.system is None:
96
+ return "Error: Failed to initialize agent properly", None
97
  except Exception as e:
98
+ print(f"Error instantiating agent: {e}")
99
  return f"Error initializing agent: {e}", None
100
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
101
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
102
+ print(agent_code)
103
 
104
  # 2. Fetch Questions
105
+ print(f"Fetching questions from: {questions_url}")
106
  try:
107
  response = requests.get(questions_url, timeout=15)
108
  response.raise_for_status()
109
  questions_data = response.json()
110
  if not questions_data:
111
+ print("Fetched questions list is empty.")
112
+ return "Fetched questions list is empty or invalid format.", None
113
+ print(f"Fetched {len(questions_data)} questions.")
114
+ except requests.exceptions.RequestException as e:
115
+ print(f"Error fetching questions: {e}")
116
  return f"Error fetching questions: {e}", None
117
+ except requests.exceptions.JSONDecodeError as e:
118
+ print(f"Error decoding JSON response from questions endpoint: {e}")
119
+ print(f"Response text: {response.text[:500]}")
120
+ return f"Error decoding server response for questions: {e}", None
121
+ except Exception as e:
122
+ print(f"An unexpected error occurred fetching questions: {e}")
123
+ return f"An unexpected error occurred fetching questions: {e}", None
124
 
125
+ # 3. Run your Agent
126
  results_log = []
127
  answers_payload = []
128
+ print(f"Running optimized agent on {len(questions_data)} questions...")
129
+ for item in questions_data:
130
  task_id = item.get("task_id")
131
  question_text = item.get("question")
 
132
  if not task_id or question_text is None:
133
+ print(f"Skipping item with missing task_id or question: {item}")
134
  continue
 
 
 
135
  try:
136
+ submitted_answer = agent(question_text)
137
 
138
+ # Additional validation to prevent question repetition
139
+ if submitted_answer == question_text or submitted_answer.startswith(question_text):
140
+ submitted_answer = "Information not available"
141
 
142
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
143
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
144
  except Exception as e:
145
+ print(f"Error running agent on task {task_id}: {e}")
146
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
147
 
148
  if not answers_payload:
149
+ print("Agent did not produce any answers to submit.")
150
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
151
 
152
+ # 4. Prepare Submission
153
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
154
+ status_update = f"Optimized agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
155
+ print(status_update)
156
+
157
+ # 5. Submit
158
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
159
  try:
160
  response = requests.post(submit_url, json=submission_data, timeout=60)
161
  response.raise_for_status()
162
  result_data = response.json()
 
163
  final_status = (
164
  f"✅ Submission Successful!\n"
165
  f"User: {result_data.get('username')}\n"
166
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
167
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
168
+ f"Message: {result_data.get('message', 'No message received.')}"
169
  )
170
+ print("Submission successful.")
171
+ results_df = pd.DataFrame(results_log)
172
+ return final_status, results_df
173
+ except requests.exceptions.HTTPError as e:
174
+ error_detail = f"Server responded with status {e.response.status_code}."
175
+ try:
176
+ error_json = e.response.json()
177
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
178
+ except requests.exceptions.JSONDecodeError:
179
+ error_detail += f" Response: {e.response.text[:500]}"
180
+ status_message = f"Submission Failed: {error_detail}"
181
+ print(status_message)
182
+ results_df = pd.DataFrame(results_log)
183
+ return status_message, results_df
184
+ except requests.exceptions.Timeout:
185
+ status_message = "Submission Failed: The request timed out."
186
+ print(status_message)
187
+ results_df = pd.DataFrame(results_log)
188
+ return status_message, results_df
189
+ except requests.exceptions.RequestException as e:
190
+ status_message = f"Submission Failed: Network error - {e}"
191
+ print(status_message)
192
+ results_df = pd.DataFrame(results_log)
193
+ return status_message, results_df
194
  except Exception as e:
195
+ status_message = f"An unexpected error occurred during submission: {e}"
196
+ print(status_message)
197
+ results_df = pd.DataFrame(results_log)
198
+ return status_message, results_df
199
 
200
+
201
+ # --- Build Gradio Interface using Blocks ---
202
  with gr.Blocks() as demo:
203
+ gr.Markdown("# Optimized Agent Evaluation Runner")
204
  gr.Markdown(
205
  """
 
 
 
 
 
 
 
 
 
206
  **Instructions:**
207
+ 1. This agent uses the optimized veryfinal.py system for better performance
208
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
209
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
 
210
 
211
+ **Optimizations:**
212
+ - Specialized question handlers for different types
213
+ - Enhanced search strategies (Wikipedia + Web)
214
+ - Better answer extraction and formatting
215
+ - Fallback answers for common questions
216
+ ---
217
+ **Expected Improvements:**
218
+ - Better handling of Mercedes Sosa album questions
219
+ - Improved Wikipedia article searches
220
+ - Enhanced numerical answer extraction
221
+ - Better cipher/code question handling
222
  """
223
  )
224
 
225
  gr.LoginButton()
226
+
227
+ run_button = gr.Button("🚀 Run Optimized Evaluation & Submit All Answers", variant="primary")
228
+
229
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
230
+ # Removed max_rows=10 from DataFrame constructor
231
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
232
 
233
  run_button.click(
234
  fn=run_and_submit_all,
 
236
  )
237
 
238
  if __name__ == "__main__":
239
+ print("\n" + "-"*30 + " Optimized App Starting " + "-"*30)
240
+ # Check for SPACE_HOST and SPACE_ID at startup for information
241
+ space_host_startup = os.getenv("SPACE_HOST")
242
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
243
+
244
+ if space_host_startup:
245
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
246
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
247
+ else:
248
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
249
+
250
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
251
+ print(f"✅ SPACE_ID found: {space_id_startup}")
252
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
253
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
254
+ else:
255
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
256
+
257
+ print("-"*(60 + len(" App Starting ")) + "\n")
258
+
259
+ print("Launching Gradio Interface for Optimized Agent Evaluation...")
260
  demo.launch(debug=True, share=False)