Sonu313131 commited on
Commit
c416910
Β·
verified Β·
1 Parent(s): 0e37d38

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -82
app.py CHANGED
@@ -1,142 +1,171 @@
1
  import os
2
  import gradio as gr
3
  import requests
 
4
  import pandas as pd
5
  import asyncio
6
- import json
 
7
  from huggingface_hub import login
8
- from smolagents import CodeAgent, InferenceClientModel, DuckDuckGoSearchTool
9
 
10
- # --- Constants ---
11
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
- QUESTIONS_URL = f"{DEFAULT_API_URL}/questions"
13
- SUBMIT_URL = f"{DEFAULT_API_URL}/submit"
14
 
15
- # --- Hugging Face Login ---
16
  login(token=os.environ["HUGGINGFACEHUB_API_TOKEN"])
17
 
18
- # --- Define Tools ---
19
  search_tool = DuckDuckGoSearchTool()
20
 
21
- # --- Main Async Function with Progress Logs ---
22
  async def run_and_submit_all(profile: gr.OAuthProfile | None):
23
  log_output = ""
 
24
  try:
25
  agent = CodeAgent(
26
  tools=[search_tool],
27
  model=InferenceClientModel(model="mistralai/Magistral-Small-2506"),
28
  max_steps=5,
29
- verbosity_level=2
30
  )
31
  except Exception as e:
32
- yield f"❌ Agent Initialization Error: {e}", None, log_output
33
  return
34
 
35
- space_id = os.getenv("SPACE_ID", "unknown")
36
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
37
 
 
38
  try:
39
- response = requests.get(QUESTIONS_URL, timeout=15)
40
  response.raise_for_status()
41
- questions = response.json()
42
- if not questions:
43
- yield "⚠️ No questions received.", None, log_output
44
  return
45
  except Exception as e:
46
- yield f"❌ Failed to fetch questions: {e}", None, log_output
47
  return
48
 
49
- answers = []
50
- logs = []
51
- loop = asyncio.get_running_loop()
52
 
53
- for item in questions:
54
  task_id = item.get("task_id")
55
- question = item.get("question")
56
- if not task_id or not question:
57
  continue
58
 
59
  log_output += f"πŸ” Solving Task ID: {task_id}...\n"
60
- yield None, None, log_output # Live update
61
-
62
- system_prompt = (
63
- "You are a general AI assistant. I will ask you a question. "
64
- "Report your thoughts, and finish your answer with the following template: "
65
- "FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.\n\n"
66
- )
67
- full_prompt = system_prompt + f"Question: {question.strip()}"
68
 
69
  try:
70
- result = await loop.run_in_executor(None, lambda: agent(full_prompt))
71
-
72
- if isinstance(result, dict) and "final_answer" in result:
73
- final_answer = str(result["final_answer"]).strip()
74
- elif isinstance(result, str):
75
- if "FINAL ANSWER:" in result:
76
- final_answer = result.split("FINAL ANSWER:")[-1].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  else:
78
- final_answer = result.strip()
79
  else:
80
- final_answer = str(result).strip()
81
 
82
- except Exception as e:
83
- final_answer = f"AGENT ERROR: {e}"
84
- print(f"[ERROR] Task {task_id} failed: {e}")
 
85
 
86
- answers.append({"task_id": task_id, "model_answer": final_answer})
87
- logs.append({"Task ID": task_id, "Question": question, "Submitted Answer": final_answer})
88
- log_output += f"βœ… Done: {task_id} β€” Answer: {final_answer[:60]}\n"
89
- yield None, None, log_output # Live update
 
90
 
91
- valid_answers = [a for a in answers if isinstance(a["task_id"], str) and isinstance(a["model_answer"], str)]
 
92
 
93
- if not valid_answers:
94
- yield "❌ Agent produced no valid answers.", pd.DataFrame(logs), log_output
 
 
 
 
 
 
 
 
 
 
95
  return
96
 
97
- submission = {
98
- "username": profile.username if profile else "unknown",
99
- "agent_code": agent_code,
100
- "answers": valid_answers
101
- }
102
-
103
- print("[DEBUG] Submitting:\n", json.dumps(submission, indent=2))
104
-
105
  try:
106
- resp = requests.post(SUBMIT_URL, json=submission, timeout=60)
107
- resp.raise_for_status()
108
- result_data = resp.json()
109
-
110
- summary = (
111
- f"βœ… Submission Successful\n"
112
  f"User: {result_data.get('username')}\n"
113
- f"Score: {result_data.get('score', 'N/A')}% "
114
- f"({result_data.get('correct_count')}/{result_data.get('total_attempted')})\n"
115
- f"Message: {result_data.get('message', 'No message.')}"
116
  )
117
- yield summary, pd.DataFrame(logs), log_output
118
-
119
  except Exception as e:
120
- yield f"❌ Submission failed: {e}", pd.DataFrame(logs), log_output
 
 
121
 
122
- # --- Gradio UI ---
123
  with gr.Blocks() as demo:
124
- gr.Markdown("# 🧠 GAIA Agent Evaluation Interface")
125
  gr.Markdown("""
126
- - Log in with your Hugging Face account.
127
- - Click the button below to run the agent and submit the answers.
128
- - Watch the log to see which question is being solved in real-time.
 
 
 
 
129
  """)
130
 
131
  gr.LoginButton()
132
- run_button = gr.Button("πŸš€ Run Evaluation & Submit")
133
- status = gr.Textbox(label="Final Status", lines=6)
134
- table = gr.DataFrame(label="Answer Log")
135
- progress_log = gr.Textbox(label="Live Progress Log", lines=10, interactive=False)
136
 
137
- run_button.click(fn=run_and_submit_all, outputs=[status, table, progress_log])
 
 
 
 
 
138
 
139
- # --- Launch ---
140
  if __name__ == "__main__":
141
- print("Launching Agent Space...")
142
- demo.launch(debug=True)
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import gradio as gr
3
  import requests
4
+ import inspect
5
  import pandas as pd
6
  import asyncio
7
+ from smolagents import ToolCallingAgent, InferenceClientModel, HfApiModel
8
+ from smolagents import DuckDuckGoSearchTool, Tool, CodeAgent
9
  from huggingface_hub import login
 
10
 
 
11
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
 
12
 
 
13
  login(token=os.environ["HUGGINGFACEHUB_API_TOKEN"])
14
 
 
15
  search_tool = DuckDuckGoSearchTool()
16
 
 
17
  async def run_and_submit_all(profile: gr.OAuthProfile | None):
18
  log_output = ""
19
+
20
  try:
21
  agent = CodeAgent(
22
  tools=[search_tool],
23
  model=InferenceClientModel(model="mistralai/Magistral-Small-2506"),
24
  max_steps=5,
25
+ verbosity_level=2
26
  )
27
  except Exception as e:
28
+ yield f"Error initializing agent: {e}", None, log_output
29
  return
30
 
31
+ space_id = os.getenv("SPACE_ID")
32
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
33
 
34
+ questions_url = f"{DEFAULT_API_URL}/questions"
35
  try:
36
+ response = requests.get(questions_url, timeout=15)
37
  response.raise_for_status()
38
+ questions_data = response.json()
39
+ if not questions_data:
40
+ yield "Fetched questions list is empty or invalid format.", None, log_output
41
  return
42
  except Exception as e:
43
+ yield f"Error fetching questions: {e}", None, log_output
44
  return
45
 
46
+ results_log = []
47
+ answers_payload = []
48
+ loop = asyncio.get_event_loop()
49
 
50
+ for item in questions_data:
51
  task_id = item.get("task_id")
52
+ question_text = item.get("question")
53
+ if not task_id or question_text is None:
54
  continue
55
 
56
  log_output += f"πŸ” Solving Task ID: {task_id}...\n"
57
+ yield None, None, log_output
 
 
 
 
 
 
 
58
 
59
  try:
60
+ system_prompt = (
61
+ "You are a general AI assistant. I will ask you a question. "
62
+ "Report your thoughts, and finish your answer with the following template: "
63
+ "FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. "
64
+ "If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. "
65
+ "If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. "
66
+ "If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.\n\n"
67
+ )
68
+ full_prompt = system_prompt + f"Question: {question_text.strip()}"
69
+
70
+ agent_result = await loop.run_in_executor(None, agent, full_prompt)
71
+
72
+ # Extract final answer cleanly
73
+ if isinstance(agent_result, dict) and "final_answer" in agent_result:
74
+ final_answer = str(agent_result["final_answer"]).strip()
75
+ elif isinstance(agent_result, str):
76
+ response_text = agent_result.strip()
77
+
78
+ # Remove known boilerplate
79
+ if "Here is the final answer from your managed agent" in response_text:
80
+ response_text = response_text.split(":", 1)[-1].strip()
81
+
82
+ if "FINAL ANSWER:" in response_text:
83
+ _, final_answer = response_text.rsplit("FINAL ANSWER:", 1)
84
+ final_answer = final_answer.strip()
85
  else:
86
+ final_answer = response_text
87
  else:
88
+ final_answer = str(agent_result).strip()
89
 
90
+ answers_payload.append({
91
+ "task_id": task_id,
92
+ "model_answer": final_answer
93
+ })
94
 
95
+ results_log.append({
96
+ "Task ID": task_id,
97
+ "Question": question_text,
98
+ "Submitted Answer": final_answer
99
+ })
100
 
101
+ log_output += f"βœ… Done: {task_id} β€” Answer: {final_answer[:60]}\n"
102
+ yield None, None, log_output
103
 
104
+ except Exception as e:
105
+ print(f"Error running agent on task {task_id}: {e}")
106
+ results_log.append({
107
+ "Task ID": task_id,
108
+ "Question": question_text,
109
+ "Submitted Answer": f"AGENT ERROR: {e}"
110
+ })
111
+ log_output += f"⛔️ Error: {task_id} β€” {e}\n"
112
+ yield None, None, log_output
113
+
114
+ if not answers_payload:
115
+ yield "Agent did not produce any answers to submit.", pd.DataFrame(results_log), log_output
116
  return
117
 
118
+ username = profile.username if profile else "unknown"
119
+ submit_url = f"{DEFAULT_API_URL}/submit"
120
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
 
 
 
 
121
  try:
122
+ response = requests.post(submit_url, json=submission_data, timeout=60)
123
+ response.raise_for_status()
124
+ result_data = response.json()
125
+ final_status = (
126
+ f"Submission Successful!\n"
 
127
  f"User: {result_data.get('username')}\n"
128
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
129
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
130
+ f"Message: {result_data.get('message', 'No message received.')}"
131
  )
132
+ results_df = pd.DataFrame(results_log)
133
+ yield final_status, results_df, log_output
134
  except Exception as e:
135
+ status_message = f"Submission Failed: {e}"
136
+ results_df = pd.DataFrame(results_log)
137
+ yield status_message, results_df, log_output
138
 
 
139
  with gr.Blocks() as demo:
140
+ gr.Markdown("# Basic Agent Evaluation Runner")
141
  gr.Markdown("""
142
+ **Instructions:**
143
+ 1. Clone this space and define your agent logic.
144
+ 2. Log in to your Hugging Face account.
145
+ 3. Click 'Run Evaluation & Submit All Answers'.
146
+ ---
147
+ **Note:**
148
+ The run may take time. Async is now used to improve responsiveness.
149
  """)
150
 
151
  gr.LoginButton()
 
 
 
 
152
 
153
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
154
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
155
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
156
+ progress_log = gr.Textbox(label="Progress Log", lines=10, interactive=False)
157
+
158
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table, progress_log])
159
 
 
160
  if __name__ == "__main__":
161
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
162
+ space_host_startup = os.getenv("SPACE_HOST")
163
+ space_id_startup = os.getenv("SPACE_ID")
164
+
165
+ if space_host_startup:
166
+ print(f"βœ… SPACE_HOST: https://{space_host_startup}.hf.space")
167
+ if space_id_startup:
168
+ print(f"βœ… SPACE_ID: https://huggingface.co/spaces/{space_id_startup}")
169
+
170
+ print("Launching Gradio Interface...")
171
+ demo.launch(debug=True, share=False)