josondev commited on
Commit
2d9c7ce
Β·
verified Β·
1 Parent(s): ca98093

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -91
app.py CHANGED
@@ -1,4 +1,4 @@
1
- """ Enhanced Multi-LLM Agent Evaluation Runner - CORRECTED VERSION"""
2
  import os
3
  import gradio as gr
4
  import requests
@@ -10,76 +10,54 @@ from veryfinal import build_graph
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
  # --- Enhanced Agent Definition ---
13
- class EnhancedMultiLLMAgent:
14
- """A multi-provider LangGraph agent with proper response handling."""
15
  def __init__(self):
16
- print("Enhanced Multi-LLM Agent initialized.")
17
  try:
18
  self.graph = build_graph(provider="groq")
19
- print("Multi-LLM Graph built successfully.")
20
  except Exception as e:
21
  print(f"Error building graph: {e}")
22
  self.graph = None
23
 
24
  def __call__(self, question: str) -> str:
25
- print(f"Agent received question: {question[:100]}...")
26
 
27
  if self.graph is None:
28
  return "Error: Agent not properly initialized"
29
 
30
- # Create complete state structure
31
- state = {
32
- "messages": [HumanMessage(content=question)],
33
- "query": question, # Critical: this must match the question
34
- "agent_type": "",
35
- "final_answer": "",
36
- "perf": {},
37
- "agno_resp": ""
38
- }
39
- # Always provide the required config with thread_id
40
- config = {"configurable": {"thread_id": f"eval_{hash(question)}"}}
41
-
42
  try:
43
- result = self.graph.invoke(state, config)
 
 
 
 
 
44
 
45
- # CORRECTED: Proper response extraction
46
- if isinstance(result, dict):
47
- # First try to get final_answer from the state
48
- if 'final_answer' in result and result['final_answer']:
49
- answer = result['final_answer']
50
- # Fallback to messages if final_answer is empty
51
- elif 'messages' in result and result['messages']:
52
- last_message = result['messages'][-1]
53
- if hasattr(last_message, 'content'):
54
- answer = last_message.content
55
- else:
56
- answer = str(last_message)
57
  else:
58
- answer = str(result)
 
 
 
 
 
 
 
 
 
 
59
  else:
60
- answer = str(result)
61
-
62
- # Clean the answer
63
- answer = answer.strip()
64
-
65
- # CRITICAL FIX: Ensure we don't return the question as answer
66
- if answer == question or answer.startswith(question):
67
  return "Information not available"
68
-
69
- # Extract final answer if present
70
- if "FINAL ANSWER:" in answer:
71
- answer = answer.split("FINAL ANSWER:")[-1].strip()
72
-
73
- # Additional validation
74
- if not answer or len(answer.strip()) == 0:
75
- return "No answer generated"
76
-
77
- return answer
78
 
79
  except Exception as e:
80
- error_msg = f"Error: {str(e)}"
81
- print(error_msg)
82
- return error_msg
83
 
84
  def run_and_submit_all(profile: gr.OAuthProfile | None):
85
  """Fetch questions, run agent, and submit answers."""
@@ -98,7 +76,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
98
 
99
  # 1. Instantiate Agent
100
  try:
101
- agent = EnhancedMultiLLMAgent()
102
  if agent.graph is None:
103
  return "Error: Failed to initialize agent properly", None
104
  except Exception as e:
@@ -106,7 +84,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
106
  return f"Error initializing agent: {e}", None
107
 
108
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "No space ID available"
109
- print(f"Agent code URL: {agent_code}")
110
 
111
  # 2. Fetch Questions
112
  print(f"Fetching questions from: {questions_url}")
@@ -115,35 +92,27 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
115
  response.raise_for_status()
116
  questions_data = response.json()
117
  if not questions_data:
118
- print("Fetched questions list is empty.")
119
  return "Fetched questions list is empty or invalid format.", None
120
  print(f"Fetched {len(questions_data)} questions.")
121
  except Exception as e:
122
- print(f"Error fetching questions: {e}")
123
  return f"Error fetching questions: {e}", None
124
 
125
- # 3. Run your Agent
126
  results_log = []
127
  answers_payload = []
128
- print(f"Running Enhanced Multi-LLM agent on {len(questions_data)} questions...")
129
 
130
  for i, item in enumerate(questions_data):
131
  task_id = item.get("task_id")
132
  question_text = item.get("question")
133
 
134
  if not task_id or question_text is None:
135
- print(f"Skipping item with missing task_id or question: {item}")
136
  continue
137
 
138
  print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")
139
 
140
  try:
141
  submitted_answer = agent(question_text)
142
-
143
- # Additional validation to prevent question repetition
144
- if submitted_answer == question_text or submitted_answer.startswith(question_text):
145
- submitted_answer = "Information not available"
146
-
147
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
148
  results_log.append({
149
  "Task ID": task_id,
@@ -152,7 +121,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
152
  })
153
  except Exception as e:
154
  error_msg = f"AGENT ERROR: {e}"
155
- print(f"Error running agent on task {task_id}: {e}")
156
  answers_payload.append({"task_id": task_id, "submitted_answer": error_msg})
157
  results_log.append({
158
  "Task ID": task_id,
@@ -161,16 +129,12 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
161
  })
162
 
163
  if not answers_payload:
164
- print("Agent did not produce any answers to submit.")
165
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
166
 
167
- # 4. Prepare Submission
168
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
169
- status_update = f"Enhanced Multi-LLM Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
170
- print(status_update)
171
-
172
- # 5. Submit
173
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
174
  try:
175
  response = requests.post(submit_url, json=submission_data, timeout=60)
176
  response.raise_for_status()
@@ -182,30 +146,30 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
182
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
183
  f"Message: {result_data.get('message', 'No message received.')}"
184
  )
185
- print("Submission successful.")
186
- results_df = pd.DataFrame(results_log)
187
- return final_status, results_df
188
  except Exception as e:
189
- status_message = f"Submission Failed: {e}"
190
- print(status_message)
191
- results_df = pd.DataFrame(results_log)
192
- return status_message, results_df
193
 
194
- # --- Build Gradio Interface ---
195
  with gr.Blocks() as demo:
196
- gr.Markdown("# Enhanced Multi-LLM Agent - CORRECTED VERSION")
197
  gr.Markdown(
198
  """
199
- **Instructions:**
200
- 1. Log in to your Hugging Face account using the button below.
201
- 2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
 
 
 
 
 
202
 
203
- **FIXES APPLIED:**
204
- - βœ… Proper response extraction from graph state
205
- - βœ… Prevention of question repetition as answer
206
- - βœ… Enhanced prompt engineering for better responses
207
- - βœ… Improved error handling and validation
208
- - βœ… Search-enhanced processing for information retrieval
209
  """
210
  )
211
 
@@ -220,5 +184,5 @@ with gr.Blocks() as demo:
220
  )
221
 
222
  if __name__ == "__main__":
223
- print("\n" + "-"*30 + " Enhanced Multi-LLM Agent CORRECTED Starting " + "-"*30)
224
  demo.launch(debug=True, share=False)
 
1
+ """ Enhanced LangGraph Agent Evaluation Runner - Final Version"""
2
  import os
3
  import gradio as gr
4
  import requests
 
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
  # --- Enhanced Agent Definition ---
13
+ class EnhancedLangGraphAgent:
14
+ """Enhanced LangGraph agent with proper response handling."""
15
  def __init__(self):
16
+ print("Enhanced LangGraph Agent initialized.")
17
  try:
18
  self.graph = build_graph(provider="groq")
19
+ print("LangGraph built successfully.")
20
  except Exception as e:
21
  print(f"Error building graph: {e}")
22
  self.graph = None
23
 
24
  def __call__(self, question: str) -> str:
25
+ print(f"Processing: {question[:100]}...")
26
 
27
  if self.graph is None:
28
  return "Error: Agent not properly initialized"
29
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  try:
31
+ # Create messages and config
32
+ messages = [HumanMessage(content=question)]
33
+ config = {"configurable": {"thread_id": f"eval_{hash(question)}"}}
34
+
35
+ # Invoke the graph
36
+ result = self.graph.invoke({"messages": messages}, config)
37
 
38
+ # Extract the final answer
39
+ if result and "messages" in result and result["messages"]:
40
+ final_message = result["messages"][-1]
41
+ if hasattr(final_message, 'content'):
42
+ answer = final_message.content
 
 
 
 
 
 
 
43
  else:
44
+ answer = str(final_message)
45
+
46
+ # Clean up the answer
47
+ if "FINAL ANSWER:" in answer:
48
+ answer = answer.split("FINAL ANSWER:")[-1].strip()
49
+
50
+ # Validate the answer
51
+ if not answer or answer == question or len(answer.strip()) == 0:
52
+ return "Information not available"
53
+
54
+ return answer.strip()
55
  else:
 
 
 
 
 
 
 
56
  return "Information not available"
 
 
 
 
 
 
 
 
 
 
57
 
58
  except Exception as e:
59
+ print(f"Error processing question: {e}")
60
+ return f"Error: {str(e)}"
 
61
 
62
  def run_and_submit_all(profile: gr.OAuthProfile | None):
63
  """Fetch questions, run agent, and submit answers."""
 
76
 
77
  # 1. Instantiate Agent
78
  try:
79
+ agent = EnhancedLangGraphAgent()
80
  if agent.graph is None:
81
  return "Error: Failed to initialize agent properly", None
82
  except Exception as e:
 
84
  return f"Error initializing agent: {e}", None
85
 
86
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "No space ID available"
 
87
 
88
  # 2. Fetch Questions
89
  print(f"Fetching questions from: {questions_url}")
 
92
  response.raise_for_status()
93
  questions_data = response.json()
94
  if not questions_data:
 
95
  return "Fetched questions list is empty or invalid format.", None
96
  print(f"Fetched {len(questions_data)} questions.")
97
  except Exception as e:
 
98
  return f"Error fetching questions: {e}", None
99
 
100
+ # 3. Run Agent
101
  results_log = []
102
  answers_payload = []
103
+ print(f"Running Enhanced LangGraph agent on {len(questions_data)} questions...")
104
 
105
  for i, item in enumerate(questions_data):
106
  task_id = item.get("task_id")
107
  question_text = item.get("question")
108
 
109
  if not task_id or question_text is None:
 
110
  continue
111
 
112
  print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")
113
 
114
  try:
115
  submitted_answer = agent(question_text)
 
 
 
 
 
116
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
117
  results_log.append({
118
  "Task ID": task_id,
 
121
  })
122
  except Exception as e:
123
  error_msg = f"AGENT ERROR: {e}"
 
124
  answers_payload.append({"task_id": task_id, "submitted_answer": error_msg})
125
  results_log.append({
126
  "Task ID": task_id,
 
129
  })
130
 
131
  if not answers_payload:
 
132
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
133
 
134
+ # 4. Submit
135
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
136
+ print(f"Submitting {len(answers_payload)} answers...")
137
+
 
 
 
138
  try:
139
  response = requests.post(submit_url, json=submission_data, timeout=60)
140
  response.raise_for_status()
 
146
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
147
  f"Message: {result_data.get('message', 'No message received.')}"
148
  )
149
+ return final_status, pd.DataFrame(results_log)
 
 
150
  except Exception as e:
151
+ return f"Submission Failed: {e}", pd.DataFrame(results_log)
 
 
 
152
 
153
+ # --- Gradio Interface ---
154
  with gr.Blocks() as demo:
155
+ gr.Markdown("# Enhanced LangGraph Agent - Final Version")
156
  gr.Markdown(
157
  """
158
+ **Features:**
159
+ - βœ… Proper LangGraph structure with tool integration
160
+ - βœ… Multi-LLM support (Groq, Google, HuggingFace)
161
+ - βœ… Enhanced search capabilities (Wikipedia, Tavily, ArXiv)
162
+ - βœ… Mathematical tools for calculations
163
+ - βœ… Vector store integration for similar questions
164
+ - βœ… Proper response formatting and validation
165
+ - βœ… Error handling and fallback mechanisms
166
 
167
+ **Tools Available:**
168
+ - Mathematical operations (add, subtract, multiply, divide, modulus)
169
+ - Wikipedia search for encyclopedic information
170
+ - Web search via Tavily for current information
171
+ - ArXiv search for academic papers
172
+ - Vector similarity search for related questions
173
  """
174
  )
175
 
 
184
  )
185
 
186
  if __name__ == "__main__":
187
+ print("\n" + "-"*30 + " Enhanced LangGraph Agent Starting " + "-"*30)
188
  demo.launch(debug=True, share=False)