josondev commited on
Commit
80c837c
·
verified ·
1 Parent(s): e292008

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -111
app.py CHANGED
@@ -1,79 +1,36 @@
1
- """ Multi-LLM Agent Evaluation Runner"""
2
  import os
3
- import inspect
4
  import gradio as gr
5
  import requests
6
  import pandas as pd
7
  from langchain_core.messages import HumanMessage
8
  from veryfinal import build_graph
9
 
10
- # --- Constants ---
11
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
 
13
- # --- Enhanced Agent Definition ---
14
- class EnhancedMultiLLMAgent:
15
- """A multi-provider LangGraph agent supporting Groq, DeepSeek, and Baidu."""
16
  def __init__(self):
17
- print("Enhanced Multi-LLM Agent initialized.")
18
- try:
19
- self.graph = build_graph(provider="groq") # Using Groq as default
20
- print("Multi-LLM Graph built successfully.")
21
- except Exception as e:
22
- print(f"Error building graph: {e}")
23
- self.graph = None
24
 
25
  def __call__(self, question: str) -> str:
26
- print(f"Agent received question: {question}")
27
-
28
- if self.graph is None:
29
- return "Error: Agent not properly initialized"
30
-
31
- # Create complete state structure that matches EnhancedAgentState
32
  state = {
33
  "messages": [HumanMessage(content=question)],
34
- "query": question, # This was the critical missing field
35
  "agent_type": "",
36
  "final_answer": "",
37
  "perf": {},
38
  "agno_resp": ""
39
  }
40
- config = {"configurable": {"thread_id": f"eval_{hash(question)}"}}
41
-
42
- try:
43
- result = self.graph.invoke(state, config)
44
-
45
- # Handle different response formats
46
- if isinstance(result, dict):
47
- if 'messages' in result and result['messages']:
48
- answer = result['messages'][-1].content
49
- elif 'final_answer' in result:
50
- answer = result['final_answer']
51
- else:
52
- answer = str(result)
53
- else:
54
- answer = str(result)
55
-
56
- # Extract final answer if present
57
- if "FINAL ANSWER:" in answer:
58
- return answer.split("FINAL ANSWER:")[-1].strip()
59
- else:
60
- return answer.strip()
61
-
62
- except Exception as e:
63
- error_msg = f"Error: {str(e)}"
64
- print(error_msg)
65
- return error_msg
66
 
67
  def run_and_submit_all(profile: gr.OAuthProfile | None):
68
- """
69
- Fetches all questions, runs the Enhanced Multi-LLM Agent on them,
70
- submits all answers, and displays the results.
71
- """
72
- # --- Determine HF Space Runtime URL and Repo URL ---
73
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
74
-
75
  if profile:
76
- username= f"{profile.username}"
77
  print(f"User logged in: {username}")
78
  else:
79
  print("User not logged in.")
@@ -83,78 +40,52 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
83
  questions_url = f"{api_url}/questions"
84
  submit_url = f"{api_url}/submit"
85
 
86
- # 1. Instantiate Agent
87
  try:
88
- agent = EnhancedMultiLLMAgent()
89
- if agent.graph is None:
90
- return "Error: Failed to initialize agent properly", None
91
  except Exception as e:
92
  print(f"Error instantiating agent: {e}")
93
  return f"Error initializing agent: {e}", None
94
-
95
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "No space ID available"
96
- print(f"Agent code URL: {agent_code}")
97
 
98
- # 2. Fetch Questions
99
  print(f"Fetching questions from: {questions_url}")
100
  try:
101
  response = requests.get(questions_url, timeout=15)
102
  response.raise_for_status()
103
  questions_data = response.json()
104
  if not questions_data:
105
- print("Fetched questions list is empty.")
106
- return "Fetched questions list is empty or invalid format.", None
107
  print(f"Fetched {len(questions_data)} questions.")
108
- except requests.exceptions.RequestException as e:
109
  print(f"Error fetching questions: {e}")
110
  return f"Error fetching questions: {e}", None
111
- except Exception as e:
112
- print(f"An unexpected error occurred fetching questions: {e}")
113
- return f"An unexpected error occurred fetching questions: {e}", None
114
 
115
- # 3. Run your Agent
116
  results_log = []
117
  answers_payload = []
118
- print(f"Running Enhanced Multi-LLM agent on {len(questions_data)} questions...")
119
-
120
- for i, item in enumerate(questions_data):
121
  task_id = item.get("task_id")
122
  question_text = item.get("question")
123
-
124
  if not task_id or question_text is None:
125
  print(f"Skipping item with missing task_id or question: {item}")
126
  continue
127
-
128
- print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")
129
-
130
  try:
131
  submitted_answer = agent(question_text)
132
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
133
- results_log.append({
134
- "Task ID": task_id,
135
- "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
136
- "Submitted Answer": submitted_answer[:200] + "..." if len(submitted_answer) > 200 else submitted_answer
137
- })
138
  except Exception as e:
139
- error_msg = f"AGENT ERROR: {e}"
140
- print(f"Error running agent on task {task_id}: {e}")
141
- answers_payload.append({"task_id": task_id, "submitted_answer": error_msg})
142
- results_log.append({
143
- "Task ID": task_id,
144
- "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
145
- "Submitted Answer": error_msg
146
- })
147
 
148
  if not answers_payload:
149
  print("Agent did not produce any answers to submit.")
150
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
151
 
152
- # 4. Prepare Submission
153
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
154
- status_update = f"Enhanced Multi-LLM Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
155
  print(status_update)
156
 
157
- # 5. Submit
158
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
159
  try:
160
  response = requests.post(submit_url, json=submission_data, timeout=60)
@@ -176,33 +107,24 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
176
  results_df = pd.DataFrame(results_log)
177
  return status_message, results_df
178
 
179
- # --- Build Gradio Interface using Blocks ---
180
  with gr.Blocks() as demo:
181
- gr.Markdown("# Enhanced Multi-LLM Agent Evaluation Runner")
182
  gr.Markdown(
183
  """
184
  **Instructions:**
185
- 1. Log in to your Hugging Face account using the button below.
186
- 2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
187
-
188
- **Enhanced Agent Features:**
189
- - **Multi-LLM Support**: Groq, DeepSeek, and Baidu ERNIE
190
- - **Intelligent Routing**: Automatically selects best provider based on query
191
- - **Mathematical Tools**: Add, subtract, multiply, divide, modulus operations
192
- - **Web Search**: Tavily and Wikipedia integration
193
- - **Error Handling**: Robust fallback mechanisms
194
- - **Rate Limiting**: Optimized for free tier usage
195
-
196
- **Supported Models:**
197
- - **Groq**: Llama 3.1 70B Versatile (fast inference)
198
- - **DeepSeek**: DeepSeek Chat (reasoning-focused)
199
- - **Baidu**: ERNIE (Chinese language optimized)
200
  """
201
  )
202
 
203
  gr.LoginButton()
204
 
205
- run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
206
 
207
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
208
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
@@ -213,5 +135,5 @@ with gr.Blocks() as demo:
213
  )
214
 
215
  if __name__ == "__main__":
216
- print("\n" + "-"*30 + " Enhanced Multi-LLM Agent Starting " + "-"*30)
217
  demo.launch(debug=True, share=False)
 
 
1
  import os
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
  from langchain_core.messages import HumanMessage
6
  from veryfinal import build_graph
7
 
 
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
 
10
+ class BasicAgent:
11
+ """A langgraph agent."""
 
12
  def __init__(self):
13
+ print("BasicAgent initialized.")
14
+ self.graph = build_graph()
 
 
 
 
 
15
 
16
  def __call__(self, question: str) -> str:
17
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
18
+ # Always pass the full state expected by the graph
 
 
 
 
19
  state = {
20
  "messages": [HumanMessage(content=question)],
21
+ "query": question,
22
  "agent_type": "",
23
  "final_answer": "",
24
  "perf": {},
25
  "agno_resp": ""
26
  }
27
+ result = self.graph.invoke(state)
28
+ return result.get("final_answer", "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  def run_and_submit_all(profile: gr.OAuthProfile | None):
31
+ space_id = os.getenv("SPACE_ID")
 
 
 
 
 
 
32
  if profile:
33
+ username = f"{profile.username}"
34
  print(f"User logged in: {username}")
35
  else:
36
  print("User not logged in.")
 
40
  questions_url = f"{api_url}/questions"
41
  submit_url = f"{api_url}/submit"
42
 
 
43
  try:
44
+ agent = BasicAgent()
 
 
45
  except Exception as e:
46
  print(f"Error instantiating agent: {e}")
47
  return f"Error initializing agent: {e}", None
48
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
49
+ print(agent_code)
 
50
 
 
51
  print(f"Fetching questions from: {questions_url}")
52
  try:
53
  response = requests.get(questions_url, timeout=15)
54
  response.raise_for_status()
55
  questions_data = response.json()
56
  if not questions_data:
57
+ print("Fetched questions list is empty.")
58
+ return "Fetched questions list is empty or invalid format.", None
59
  print(f"Fetched {len(questions_data)} questions.")
60
+ except Exception as e:
61
  print(f"Error fetching questions: {e}")
62
  return f"Error fetching questions: {e}", None
 
 
 
63
 
 
64
  results_log = []
65
  answers_payload = []
66
+ print(f"Running agent on {len(questions_data)} questions...")
67
+ for item in questions_data:
 
68
  task_id = item.get("task_id")
69
  question_text = item.get("question")
 
70
  if not task_id or question_text is None:
71
  print(f"Skipping item with missing task_id or question: {item}")
72
  continue
 
 
 
73
  try:
74
  submitted_answer = agent(question_text)
75
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
76
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
77
  except Exception as e:
78
+ print(f"Error running agent on task {task_id}: {e}")
79
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
80
 
81
  if not answers_payload:
82
  print("Agent did not produce any answers to submit.")
83
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
84
 
 
85
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
86
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
87
  print(status_update)
88
 
 
89
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
90
  try:
91
  response = requests.post(submit_url, json=submission_data, timeout=60)
 
107
  results_df = pd.DataFrame(results_log)
108
  return status_message, results_df
109
 
 
110
  with gr.Blocks() as demo:
111
+ gr.Markdown("# Basic Agent Evaluation Runner")
112
  gr.Markdown(
113
  """
114
  **Instructions:**
115
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
116
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
117
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
118
+ ---
119
+ **Disclaimers:**
120
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
121
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
 
 
 
 
 
 
 
 
122
  """
123
  )
124
 
125
  gr.LoginButton()
126
 
127
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
128
 
129
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
130
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
 
135
  )
136
 
137
  if __name__ == "__main__":
138
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
139
  demo.launch(debug=True, share=False)