Sonu313131 commited on
Commit
03a5300
Β·
verified Β·
1 Parent(s): fdfeb12

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -72
app.py CHANGED
@@ -4,7 +4,6 @@ import requests
4
  import pandas as pd
5
  import asyncio
6
  import json
7
- import concurrent.futures
8
  from huggingface_hub import login
9
  from smolagents import CodeAgent, InferenceClientModel, DuckDuckGoSearchTool
10
 
@@ -19,8 +18,7 @@ login(token=os.environ["HUGGINGFACEHUB_API_TOKEN"])
19
  # --- Define Tools ---
20
  search_tool = DuckDuckGoSearchTool()
21
 
22
-
23
- # --- Main Function ---
24
  async def run_and_submit_all(profile: gr.OAuthProfile | None):
25
  # Initialize Agent
26
  try:
@@ -31,9 +29,8 @@ async def run_and_submit_all(profile: gr.OAuthProfile | None):
31
  verbosity_level=2
32
  )
33
  except Exception as e:
34
- return f"Error initializing agent: {e}", None
35
 
36
- # Get Space ID for agent_code link
37
  space_id = os.getenv("SPACE_ID", "unknown")
38
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
39
 
@@ -41,18 +38,16 @@ async def run_and_submit_all(profile: gr.OAuthProfile | None):
41
  try:
42
  response = requests.get(QUESTIONS_URL, timeout=15)
43
  response.raise_for_status()
44
- questions_data = response.json()
45
- if not questions_data:
46
- return "No questions received.", None
47
  except Exception as e:
48
- return f"Error fetching questions: {e}", None
49
 
50
- # Prepare results
51
- answers_payload = []
52
- results_log = []
53
- loop = asyncio.get_event_loop()
54
 
55
- for item in questions_data:
56
  task_id = item.get("task_id")
57
  question = item.get("question")
58
  if not task_id or not question:
@@ -61,100 +56,78 @@ async def run_and_submit_all(profile: gr.OAuthProfile | None):
61
  system_prompt = (
62
  "You are a general AI assistant. I will ask you a question. "
63
  "Report your thoughts, and finish your answer with the following template: "
64
- "FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. "
65
- "If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. "
66
- "If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. "
67
- "If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.\n\n"
68
  )
69
- prompt = system_prompt + f"Question: {question.strip()}"
70
 
71
- # Run agent with timeout
72
  try:
73
- with concurrent.futures.ThreadPoolExecutor() as executor:
74
- future = executor.submit(agent, prompt)
75
- agent_result = await loop.run_in_executor(None, future.result, 60) # timeout=60s
76
-
77
- # Clean model output
78
- if isinstance(agent_result, dict) and "final_answer" in agent_result:
79
- final_answer = str(agent_result["final_answer"]).strip()
80
- elif isinstance(agent_result, str):
81
- response_text = agent_result.strip()
82
-
83
- # Remove known boilerplate
84
- if "Here is the final answer from your managed agent" in response_text:
85
- response_text = response_text.split(":", 1)[-1].strip()
86
-
87
- # Extract final answer
88
- if "FINAL ANSWER:" in response_text:
89
- _, final_answer = response_text.rsplit("FINAL ANSWER:", 1)
90
- final_answer = final_answer.strip()
91
  else:
92
- final_answer = response_text
93
  else:
94
- final_answer = str(agent_result).strip()
95
 
96
  except Exception as e:
97
  print(f"[ERROR] Task {task_id} failed: {e}")
98
  final_answer = f"AGENT ERROR: {e}"
99
 
100
- answers_payload.append({"task_id": task_id, "model_answer": final_answer})
101
- results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": final_answer})
102
 
103
- # Clean invalid entries
104
- valid_answers = [a for a in answers_payload if isinstance(a["task_id"], str) and isinstance(a["model_answer"], str)]
105
 
106
  if not valid_answers:
107
- return "Agent produced no valid answers.", pd.DataFrame(results_log)
108
 
109
- # Prepare submission
110
- username = profile.username if profile else "unknown"
111
- submission_data = {
112
- "username": username.strip(),
113
  "agent_code": agent_code,
114
  "answers": valid_answers
115
  }
116
 
117
- print("[DEBUG] Submission Payload:\n", json.dumps(submission_data, indent=2))
118
 
119
  try:
120
- response = requests.post(SUBMIT_URL, json=submission_data, timeout=60)
121
- response.raise_for_status()
122
- result_data = response.json()
123
 
124
- final_status = (
125
  f"βœ… Submission Successful\n"
126
  f"User: {result_data.get('username')}\n"
127
  f"Score: {result_data.get('score', 'N/A')}% "
128
  f"({result_data.get('correct_count')}/{result_data.get('total_attempted')})\n"
129
  f"Message: {result_data.get('message', 'No message.')}"
130
  )
131
- return final_status, pd.DataFrame(results_log)
132
 
133
  except Exception as e:
134
- return f"Submission Failed: {e}", pd.DataFrame(results_log)
135
-
136
 
137
  # --- Gradio UI ---
138
  with gr.Blocks() as demo:
139
- gr.Markdown("# Agent Evaluation Interface")
140
  gr.Markdown("""
141
- **Instructions:**
142
- 1. Clone and customize the agent logic.
143
- 2. Log in to Hugging Face.
144
- 3. Click "Run Evaluation" to test and submit your answers.
145
  """)
146
 
147
  gr.LoginButton()
148
- run_button = gr.Button("Run Evaluation & Submit All Answers")
149
- status_output = gr.Textbox(label="Status", lines=5, interactive=False)
150
- results_table = gr.DataFrame(label="Agent Answers", wrap=True)
151
 
152
- run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
153
- #hi
154
 
155
- # --- App Launch ---
156
  if __name__ == "__main__":
157
- print("\n--- Launching Gradio Space ---")
158
- print(f"βœ… SPACE_HOST: {os.getenv('SPACE_HOST')}")
159
- print(f"βœ… SPACE_ID: {os.getenv('SPACE_ID')}")
160
- demo.launch(debug=True, share=False)
 
4
  import pandas as pd
5
  import asyncio
6
  import json
 
7
  from huggingface_hub import login
8
  from smolagents import CodeAgent, InferenceClientModel, DuckDuckGoSearchTool
9
 
 
18
  # --- Define Tools ---
19
  search_tool = DuckDuckGoSearchTool()
20
 
21
+ # --- Main Async Function ---
 
22
  async def run_and_submit_all(profile: gr.OAuthProfile | None):
23
  # Initialize Agent
24
  try:
 
29
  verbosity_level=2
30
  )
31
  except Exception as e:
32
+ return f"❌ Agent Initialization Error: {e}", None
33
 
 
34
  space_id = os.getenv("SPACE_ID", "unknown")
35
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
36
 
 
38
  try:
39
  response = requests.get(QUESTIONS_URL, timeout=15)
40
  response.raise_for_status()
41
+ questions = response.json()
42
+ if not questions:
43
+ return "⚠️ No questions received.", None
44
  except Exception as e:
45
+ return f"❌ Failed to fetch questions: {e}", None
46
 
47
+ answers = []
48
+ logs = []
 
 
49
 
50
+ for item in questions:
51
  task_id = item.get("task_id")
52
  question = item.get("question")
53
  if not task_id or not question:
 
56
  system_prompt = (
57
  "You are a general AI assistant. I will ask you a question. "
58
  "Report your thoughts, and finish your answer with the following template: "
59
+ "FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.\n\n"
 
 
 
60
  )
61
+ full_prompt = system_prompt + f"Question: {question.strip()}"
62
 
 
63
  try:
64
+ loop = asyncio.get_running_loop()
65
+ result = await loop.run_in_executor(None, lambda: agent(full_prompt))
66
+
67
+ if isinstance(result, dict) and "final_answer" in result:
68
+ final_answer = str(result["final_answer"]).strip()
69
+ elif isinstance(result, str):
70
+ if "FINAL ANSWER:" in result:
71
+ final_answer = result.split("FINAL ANSWER:")[-1].strip()
 
 
 
 
 
 
 
 
 
 
72
  else:
73
+ final_answer = result.strip()
74
  else:
75
+ final_answer = str(result).strip()
76
 
77
  except Exception as e:
78
  print(f"[ERROR] Task {task_id} failed: {e}")
79
  final_answer = f"AGENT ERROR: {e}"
80
 
81
+ answers.append({"task_id": task_id, "model_answer": final_answer})
82
+ logs.append({"Task ID": task_id, "Question": question, "Submitted Answer": final_answer})
83
 
84
+ valid_answers = [a for a in answers if isinstance(a["task_id"], str) and isinstance(a["model_answer"], str)]
 
85
 
86
  if not valid_answers:
87
+ return "❌ Agent produced no valid answers.", pd.DataFrame(logs)
88
 
89
+ submission = {
90
+ "username": profile.username if profile else "unknown",
 
 
91
  "agent_code": agent_code,
92
  "answers": valid_answers
93
  }
94
 
95
+ print("[DEBUG] Submitting:\n", json.dumps(submission, indent=2))
96
 
97
  try:
98
+ resp = requests.post(SUBMIT_URL, json=submission, timeout=60)
99
+ resp.raise_for_status()
100
+ result_data = resp.json()
101
 
102
+ summary = (
103
  f"βœ… Submission Successful\n"
104
  f"User: {result_data.get('username')}\n"
105
  f"Score: {result_data.get('score', 'N/A')}% "
106
  f"({result_data.get('correct_count')}/{result_data.get('total_attempted')})\n"
107
  f"Message: {result_data.get('message', 'No message.')}"
108
  )
109
+ return summary, pd.DataFrame(logs)
110
 
111
  except Exception as e:
112
+ return f"❌ Submission failed: {e}", pd.DataFrame(logs)
 
113
 
114
  # --- Gradio UI ---
115
  with gr.Blocks() as demo:
116
+ gr.Markdown("# 🧠 GAIA Agent Evaluation Interface")
117
  gr.Markdown("""
118
+ - Log in with your Hugging Face account.
119
+ - Click the button below to run the agent and submit the answers.
120
+ - Wait for the final score to appear.
 
121
  """)
122
 
123
  gr.LoginButton()
124
+ run_button = gr.Button("πŸš€ Run Evaluation & Submit")
125
+ status = gr.Textbox(label="Status", lines=6)
126
+ table = gr.DataFrame(label="Answer Log")
127
 
128
+ run_button.click(fn=run_and_submit_all, outputs=[status, table])
 
129
 
130
+ # --- Launch ---
131
  if __name__ == "__main__":
132
+ print("Launching Agent Space...")
133
+ demo.launch(debug=True)