sirine1712 commited on
Commit
f342a23
·
verified ·
1 Parent(s): 8cc1f15

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +191 -122
app.py CHANGED
@@ -2,152 +2,221 @@ import os
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
- import math
6
 
7
- from smolagents import ToolCallingAgent, tool
8
- from duckduckgo_search import DDGS
9
- from openai import OpenAI
10
 
11
- # Load OpenAI API key
12
- client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
13
 
14
- # ------------------------
15
- # Define Tools
16
- # ------------------------
 
 
17
 
18
- @tool
19
- def web_search(query: str) -> str:
20
- """Search the web using DuckDuckGo.
 
21
 
22
- Args:
23
- query: The search query to look up.
24
 
25
- Returns:
26
- A summary of the top web results.
27
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  try:
29
- with DDGS() as ddgs:
30
- results = ddgs.text(query, max_results=3)
31
- if not results:
32
- return "No results found."
33
- return "\n\n".join(
34
- f"Title: {r['title']}\nSnippet: {r['body']}\nURL: {r['href']}"
35
- for r in results
36
- )
37
  except Exception as e:
38
- return f"Search error: {str(e)}"
39
-
40
- @tool
41
- def calculate(expression: str) -> str:
42
- """Evaluate a mathematical expression.
43
-
44
- Args:
45
- expression: The math expression to evaluate (e.g. '2 + 3 * 5').
46
-
47
- Returns:
48
- Result of the calculation.
49
- """
50
  try:
51
- safe_math = {k: v for k, v in math.__dict__.items() if not k.startswith("__")}
52
- result = eval(expression, {"__builtins__": None}, safe_math)
53
- return str(result)
 
 
 
 
 
 
 
 
 
 
 
54
  except Exception as e:
55
- return f"Calculation error: {str(e)}"
56
-
57
- # ------------------------
58
- # Define Agent
59
- # ------------------------
60
-
61
- class GAIAAgent:
62
- def __init__(self):
63
- self.agent = ToolCallingAgent(
64
- name="GAIA_Agent",
65
- description="""You are an AI assistant that answers questions using tools:
66
- - Use 'web_search' for looking up facts and recent information.
67
- - Use 'calculate' for evaluating math expressions.
68
- Be accurate and concise.""",
69
- tools=[web_search, calculate],
70
- model=client.chat.completions
71
- )
72
-
73
- def __call__(self, question: str) -> str:
 
 
74
  try:
75
- response = self.agent.run(question)
76
- return str(response)
77
- except Exception as e:
78
- return f"Agent error: {str(e)}"
79
 
80
- # ------------------------
81
- # Gradio App Logic
82
- # ------------------------
83
 
84
- def run_agent_and_submit(profile: gr.OAuthProfile | None):
85
- if not profile:
86
- return "⚠️ Please log in to Hugging Face.", None
 
 
87
 
88
- try:
89
- agent = GAIAAgent()
90
- response = requests.get("https://agents-course-unit4-scoring.hf.space/questions", timeout=20)
91
- questions = response.json()
92
- except Exception as e:
93
- return f"❌ Error fetching questions: {e}", None
94
 
95
- results = []
96
- answers = []
 
 
97
 
98
- for q in questions:
99
- task_id = q.get("task_id")
100
- question_text = q.get("question")
101
- if not task_id or not question_text:
102
- continue
103
- try:
104
- answer = agent(question_text)
105
- except Exception as e:
106
- answer = f"Agent error: {e}"
107
-
108
- answers.append({
109
- "task_id": task_id,
110
- "submitted_answer": answer[:1000]
111
- })
112
- results.append({
113
- "Task ID": task_id,
114
- "Question": question_text,
115
- "Answer": answer
116
- })
117
-
118
- # Submit answers
119
  try:
120
- submit_url = "https://agents-course-unit4-scoring.hf.space/submit"
121
- payload = {
122
- "username": profile.username,
123
- "agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main",
124
- "answers": answers
125
- }
126
- submit_resp = requests.post(submit_url, json=payload, timeout=60)
127
- result_data = submit_resp.json()
128
- summary = (
129
- f"✅ Submitted {len(answers)} answers\n"
130
- f"📊 Score: {result_data.get('score', 'N/A')}%\n"
131
- f"✔️ Correct: {result_data.get('correct_count', '?')}/{len(answers)}"
132
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  except Exception as e:
134
- summary = f" Submission error: {e}"
 
 
 
135
 
136
- return summary, pd.DataFrame(results)
137
-
138
- # ------------------------
139
- # Gradio Interface
140
- # ------------------------
141
 
 
142
  with gr.Blocks() as demo:
143
- gr.Markdown("# 🤖 GAIA Tool Agent")
144
- gr.Markdown("This agent answers GAIA benchmark questions using tool-calling with search and math.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  gr.LoginButton()
146
- run_btn = gr.Button("🔍 Run Agent & Submit")
147
- status = gr.Textbox(label="Status", lines=4)
148
- results_df = gr.DataFrame(label="Results")
149
 
150
- run_btn.click(fn=run_agent_and_submit, outputs=[status, results_df])
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
  if __name__ == "__main__":
153
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
+ from dotenv import load_dotenv
6
 
7
+ from myagent import BasicAgent # Unused basic single agent
8
+ from multiagents import MultiAgent
 
9
 
10
+ from phoenix.otel import register
11
+ from openinference.instrumentation.smolagents import SmolagentsInstrumentor
12
 
13
+ # use space_host var to determine if running in HF space or locally, if so register local instrumentation
14
+ space_host_startup = os.getenv("SPACE_HOST")
15
+ if not space_host_startup:
16
+ register()
17
+ SmolagentsInstrumentor().instrument()
18
 
19
+ # (Keep Constants as is)
20
+ # --- Constants ---
21
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
22
+ load_dotenv()
23
 
24
+ max_questions = 20
 
25
 
26
+ def run_and_submit_all(nb_questions: int, profile: gr.OAuthProfile | None):
 
27
  """
28
+ Fetches all questions, runs my Agent on them, submits all answers,
29
+ and displays the results.
30
+ """
31
+ # --- Determine HF Space Runtime URL and Repo URL ---
32
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
33
+
34
+ if profile:
35
+ username= f"{profile.username}"
36
+ print(f"User logged in: {username}")
37
+ else:
38
+ print("User not logged in.")
39
+ return "Please Login to Hugging Face with the button.", None
40
+
41
+ api_url = DEFAULT_API_URL
42
+ questions_url = f"{api_url}/questions"
43
+ file_url = f"{api_url}/files"
44
+ submit_url = f"{api_url}/submit"
45
+
46
+ # 1. Instantiate Agent
47
  try:
48
+ # agent = BasicAgent()
49
+ agent = MultiAgent()
 
 
 
 
 
 
50
  except Exception as e:
51
+ print(f"Error instantiating agent: {e}")
52
+ return f"Error initializing agent: {e}", None
53
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
54
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
55
+ print(agent_code)
56
+
57
+ # 2. Fetch Questions
58
+ print(f"Fetching questions from: {questions_url}")
 
 
 
 
59
  try:
60
+ response = requests.get(questions_url, timeout=15)
61
+ response.raise_for_status()
62
+ questions_data = response.json()
63
+ if not questions_data:
64
+ print("Fetched questions list is empty.")
65
+ return "Fetched questions list is empty or invalid format.", None
66
+ print(f"Fetched {len(questions_data)} questions.")
67
+ except requests.exceptions.RequestException as e:
68
+ print(f"Error fetching questions: {e}")
69
+ return f"Error fetching questions: {e}", None
70
+ except requests.exceptions.JSONDecodeError as e:
71
+ print(f"Error decoding JSON response from questions endpoint: {e}")
72
+ print(f"Response text: {response.text[:500]}")
73
+ return f"Error decoding server response for questions: {e}", None
74
  except Exception as e:
75
+ print(f"An unexpected error occurred fetching questions: {e}")
76
+ return f"An unexpected error occurred fetching questions: {e}", None
77
+
78
+ # 3. Run your Agent
79
+ results_log = []
80
+ answers_payload = []
81
+
82
+ # for testing keep only some questions
83
+ questions_data = questions_data[:nb_questions]
84
+
85
+ print(f"Running agent on {len(questions_data)} questions...")
86
+ for item in questions_data:
87
+ task_id = item.get("task_id")
88
+ question_text = item.get("question")
89
+ file_name = item.get("file_name")
90
+ file_question_url = None
91
+ if file_name:
92
+ file_question_url = f"{file_url}/{task_id}"
93
+ if not task_id or question_text is None:
94
+ print(f"Skipping item with missing task_id or question: {item}")
95
+ continue
96
  try:
97
+ agent_question = question_text
98
+ if file_question_url:
99
+ agent_question += f"\n\nFile URL: {file_question_url}"
 
100
 
101
+ submitted_answer = agent(agent_question)
 
 
102
 
103
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
104
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
105
+ except Exception as e:
106
+ print(f"Error running agent on task {task_id}: {e}")
107
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
108
 
109
+ if not answers_payload:
110
+ print("Agent did not produce any answers to submit.")
111
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
 
 
 
112
 
113
+ # 4. Prepare Submission
114
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
115
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
116
+ print(status_update)
117
 
118
+ # 5. Submit
119
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  try:
121
+ response = requests.post(submit_url, json=submission_data, timeout=60)
122
+ response.raise_for_status()
123
+ result_data = response.json()
124
+ final_status = (
125
+ f"Submission Successful!\n"
126
+ f"User: {result_data.get('username')}\n"
127
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
128
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
129
+ f"Message: {result_data.get('message', 'No message received.')}"
 
 
 
130
  )
131
+ print("Submission successful.")
132
+ results_df = pd.DataFrame(results_log)
133
+ return final_status, results_df
134
+ except requests.exceptions.HTTPError as e:
135
+ error_detail = f"Server responded with status {e.response.status_code}."
136
+ try:
137
+ error_json = e.response.json()
138
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
139
+ except requests.exceptions.JSONDecodeError:
140
+ error_detail += f" Response: {e.response.text[:500]}"
141
+ status_message = f"Submission Failed: {error_detail}"
142
+ print(status_message)
143
+ results_df = pd.DataFrame(results_log)
144
+ return status_message, results_df
145
+ except requests.exceptions.Timeout:
146
+ status_message = "Submission Failed: The request timed out."
147
+ print(status_message)
148
+ results_df = pd.DataFrame(results_log)
149
+ return status_message, results_df
150
+ except requests.exceptions.RequestException as e:
151
+ status_message = f"Submission Failed: Network error - {e}"
152
+ print(status_message)
153
+ results_df = pd.DataFrame(results_log)
154
+ return status_message, results_df
155
  except Exception as e:
156
+ status_message = f"An unexpected error occurred during submission: {e}"
157
+ print(status_message)
158
+ results_df = pd.DataFrame(results_log)
159
+ return status_message, results_df
160
 
 
 
 
 
 
161
 
162
+ # --- Build Gradio Interface using Blocks ---
163
  with gr.Blocks() as demo:
164
+ gr.Markdown("# Basic Agent Evaluation Runner")
165
+ gr.Markdown("""
166
+ *Special Considerations*: Due to limitation issues, this code depend on local search engine and local speech to text model. Both run through docker, see the readme file."
167
+ One can achieve similar result, by using Google search API and OpenAI Whisper API.
168
+ """)
169
+ gr.Markdown(
170
+ """
171
+ **Instructions:**
172
+
173
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
174
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
175
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
176
+
177
+ ---
178
+ **Disclaimers:**
179
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
180
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
181
+ """
182
+ )
183
+
184
  gr.LoginButton()
 
 
 
185
 
186
+ nb_questions = gr.Number(value=20)
187
+
188
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
189
+
190
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
191
+ # Removed max_rows=10 from DataFrame constructor
192
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
193
+
194
+ run_button.click(
195
+ fn=run_and_submit_all,
196
+ inputs=[nb_questions],
197
+ outputs=[status_output, results_table]
198
+ )
199
 
200
  if __name__ == "__main__":
201
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
202
+ # Check for SPACE_HOST and SPACE_ID at startup for information
203
+ space_host_startup = os.getenv("SPACE_HOST")
204
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
205
+
206
+ if space_host_startup:
207
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
208
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
209
+ else:
210
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
211
+
212
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
213
+ print(f"✅ SPACE_ID found: {space_id_startup}")
214
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
215
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
216
+ else:
217
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
218
+
219
+ print("-"*(60 + len(" App Starting ")) + "\n")
220
+
221
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
222
+ demo.launch(debug=True, share=False)