josondev commited on
Commit
15b6891
Β·
verified Β·
1 Parent(s): 86c8869

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -43
app.py CHANGED
@@ -1,4 +1,4 @@
1
- """ Enhanced Multi-LLM Agent Evaluation Runner with Agno Integration"""
2
  import os
3
  import gradio as gr
4
  import requests
@@ -11,56 +11,70 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
  # --- Enhanced Agent Definition ---
13
  class EnhancedMultiLLMAgent:
14
- """A multi-provider LangGraph agent with Agno-style reasoning capabilities."""
15
  def __init__(self):
16
- print("Enhanced Multi-LLM Agent with Agno Integration initialized.")
17
  try:
18
  self.graph = build_graph(provider="groq")
19
- print("Enhanced Multi-LLM Graph built successfully.")
20
  except Exception as e:
21
  print(f"Error building graph: {e}")
22
  self.graph = None
23
 
24
  def __call__(self, question: str) -> str:
25
- print(f"Agent received question (first 50 chars): {question[:50]}...")
26
 
27
  if self.graph is None:
28
  return "Error: Agent not properly initialized"
29
 
30
- # CRITICAL FIX: Always pass the complete state expected by the graph
31
  state = {
32
  "messages": [HumanMessage(content=question)],
33
- "query": question, # This was the critical missing field
34
  "agent_type": "",
35
  "final_answer": "",
36
  "perf": {},
37
- "agno_resp": "",
38
- "tools_used": [],
39
- "reasoning": "",
40
- "confidence": ""
41
  }
42
- # CRITICAL FIX: Always provide the required config with thread_id
43
  config = {"configurable": {"thread_id": f"eval_{hash(question)}"}}
44
 
45
  try:
46
  result = self.graph.invoke(state, config)
47
 
48
- # Handle different response formats
49
  if isinstance(result, dict):
50
- if 'messages' in result and result['messages']:
51
- answer = result['messages'][-1].content
52
- elif 'final_answer' in result:
53
  answer = result['final_answer']
 
 
 
 
 
 
 
54
  else:
55
  answer = str(result)
56
  else:
57
  answer = str(result)
58
 
 
 
 
 
 
 
 
59
  # Extract final answer if present
60
  if "FINAL ANSWER:" in answer:
61
- return answer.split("FINAL ANSWER:")[-1].strip()
62
- else:
63
- return answer.strip()
 
 
 
 
64
 
65
  except Exception as e:
66
  error_msg = f"Error: {str(e)}"
@@ -68,10 +82,7 @@ class EnhancedMultiLLMAgent:
68
  return error_msg
69
 
70
  def run_and_submit_all(profile: gr.OAuthProfile | None):
71
- """
72
- Fetches all questions, runs the Enhanced Multi-LLM Agent on them,
73
- submits all answers, and displays the results.
74
- """
75
  space_id = os.getenv("SPACE_ID")
76
 
77
  if profile:
@@ -114,7 +125,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
114
  # 3. Run your Agent
115
  results_log = []
116
  answers_payload = []
117
- print(f"Running Enhanced Multi-LLM agent with Agno integration on {len(questions_data)} questions...")
118
 
119
  for i, item in enumerate(questions_data):
120
  task_id = item.get("task_id")
@@ -128,6 +139,11 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
128
 
129
  try:
130
  submitted_answer = agent(question_text)
 
 
 
 
 
131
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
132
  results_log.append({
133
  "Task ID": task_id,
@@ -175,36 +191,26 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
175
  results_df = pd.DataFrame(results_log)
176
  return status_message, results_df
177
 
178
- # --- Build Gradio Interface using Blocks ---
179
  with gr.Blocks() as demo:
180
- gr.Markdown("# Enhanced Multi-LLM Agent with Agno Integration")
181
  gr.Markdown(
182
  """
183
  **Instructions:**
184
  1. Log in to your Hugging Face account using the button below.
185
  2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
186
 
187
- **Enhanced Agent Features:**
188
- - **Multi-LLM Support**: Groq (Llama-3 8B/70B, DeepSeek), Google Gemini, NVIDIA NIM
189
- - **Agno Integration**: Systematic reasoning with step-by-step analysis
190
- - **Intelligent Routing**: Automatically selects best provider based on query complexity
191
- - **Enhanced Tools**: Mathematical operations, web search, Wikipedia integration
192
- - **Question-Answering**: Optimized for evaluation tasks with proper formatting
193
- - **Error Handling**: Robust fallback mechanisms and comprehensive logging
194
-
195
- **Routing Examples:**
196
- - Standard: "What is the capital of France?" β†’ Llama-3 8B
197
- - Complex: "Analyze quantum computing principles" β†’ Llama-3 70B
198
- - Search: "Find information about Mercedes Sosa" β†’ Search-Enhanced
199
- - Agno: "agno llama-70: Systematic analysis of AI ethics" β†’ Agno Llama-3 70B
200
- - Provider-specific: "google: Explain machine learning" β†’ Google Gemini
201
  """
202
  )
203
 
204
  gr.LoginButton()
205
-
206
  run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
207
-
208
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
209
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
210
 
@@ -214,5 +220,5 @@ with gr.Blocks() as demo:
214
  )
215
 
216
  if __name__ == "__main__":
217
- print("\n" + "-"*30 + " Enhanced Multi-LLM Agent with Agno Starting " + "-"*30)
218
  demo.launch(debug=True, share=False)
 
1
+ """ Enhanced Multi-LLM Agent Evaluation Runner - CORRECTED VERSION"""
2
  import os
3
  import gradio as gr
4
  import requests
 
11
 
12
  # --- Enhanced Agent Definition ---
13
  class EnhancedMultiLLMAgent:
14
+ """A multi-provider LangGraph agent with proper response handling."""
15
  def __init__(self):
16
+ print("Enhanced Multi-LLM Agent initialized.")
17
  try:
18
  self.graph = build_graph(provider="groq")
19
+ print("Multi-LLM Graph built successfully.")
20
  except Exception as e:
21
  print(f"Error building graph: {e}")
22
  self.graph = None
23
 
24
  def __call__(self, question: str) -> str:
25
+ print(f"Agent received question: {question[:100]}...")
26
 
27
  if self.graph is None:
28
  return "Error: Agent not properly initialized"
29
 
30
+ # Create complete state structure
31
  state = {
32
  "messages": [HumanMessage(content=question)],
33
+ "query": question, # Critical: this must match the question
34
  "agent_type": "",
35
  "final_answer": "",
36
  "perf": {},
37
+ "agno_resp": ""
 
 
 
38
  }
39
+ # Always provide the required config with thread_id
40
  config = {"configurable": {"thread_id": f"eval_{hash(question)}"}}
41
 
42
  try:
43
  result = self.graph.invoke(state, config)
44
 
45
+ # CORRECTED: Proper response extraction
46
  if isinstance(result, dict):
47
+ # First try to get final_answer from the state
48
+ if 'final_answer' in result and result['final_answer']:
 
49
  answer = result['final_answer']
50
+ # Fallback to messages if final_answer is empty
51
+ elif 'messages' in result and result['messages']:
52
+ last_message = result['messages'][-1]
53
+ if hasattr(last_message, 'content'):
54
+ answer = last_message.content
55
+ else:
56
+ answer = str(last_message)
57
  else:
58
  answer = str(result)
59
  else:
60
  answer = str(result)
61
 
62
+ # Clean the answer
63
+ answer = answer.strip()
64
+
65
+ # CRITICAL FIX: Ensure we don't return the question as answer
66
+ if answer == question or answer.startswith(question):
67
+ return "Information not available"
68
+
69
  # Extract final answer if present
70
  if "FINAL ANSWER:" in answer:
71
+ answer = answer.split("FINAL ANSWER:")[-1].strip()
72
+
73
+ # Additional validation
74
+ if not answer or len(answer.strip()) == 0:
75
+ return "No answer generated"
76
+
77
+ return answer
78
 
79
  except Exception as e:
80
  error_msg = f"Error: {str(e)}"
 
82
  return error_msg
83
 
84
  def run_and_submit_all(profile: gr.OAuthProfile | None):
85
+ """Fetch questions, run agent, and submit answers."""
 
 
 
86
  space_id = os.getenv("SPACE_ID")
87
 
88
  if profile:
 
125
  # 3. Run your Agent
126
  results_log = []
127
  answers_payload = []
128
+ print(f"Running Enhanced Multi-LLM agent on {len(questions_data)} questions...")
129
 
130
  for i, item in enumerate(questions_data):
131
  task_id = item.get("task_id")
 
139
 
140
  try:
141
  submitted_answer = agent(question_text)
142
+
143
+ # Additional validation to prevent question repetition
144
+ if submitted_answer == question_text or submitted_answer.startswith(question_text):
145
+ submitted_answer = "Information not available"
146
+
147
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
148
  results_log.append({
149
  "Task ID": task_id,
 
191
  results_df = pd.DataFrame(results_log)
192
  return status_message, results_df
193
 
194
+ # --- Build Gradio Interface ---
195
  with gr.Blocks() as demo:
196
+ gr.Markdown("# Enhanced Multi-LLM Agent - CORRECTED VERSION")
197
  gr.Markdown(
198
  """
199
  **Instructions:**
200
  1. Log in to your Hugging Face account using the button below.
201
  2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
202
 
203
+ **FIXES APPLIED:**
204
+ - βœ… Proper response extraction from graph state
205
+ - βœ… Prevention of question repetition as answer
206
+ - βœ… Enhanced prompt engineering for better responses
207
+ - βœ… Improved error handling and validation
208
+ - βœ… Search-enhanced processing for information retrieval
 
 
 
 
 
 
 
 
209
  """
210
  )
211
 
212
  gr.LoginButton()
 
213
  run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
 
214
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
215
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
216
 
 
220
  )
221
 
222
  if __name__ == "__main__":
223
+ print("\n" + "-"*30 + " Enhanced Multi-LLM Agent CORRECTED Starting " + "-"*30)
224
  demo.launch(debug=True, share=False)