huytofu92 commited on
Commit
e9af7ec
·
1 Parent(s): 68c76ae
Files changed (1) hide show
  1. app.py +68 -9
app.py CHANGED
@@ -6,6 +6,7 @@ import datasets
6
  from mini_agents import master_agent
7
  from utils import get_full_file_path
8
  from smolagents.memory import ActionStep, PlanningStep, TaskStep, SystemPromptStep, FinalAnswerStep
 
9
 
10
  # (Keep Constants as is)
11
  # --- Constants ---
@@ -72,6 +73,60 @@ class BasicAgent:
72
  print(f"Agent returning fixed answer: {fixed_answer}")
73
  return fixed_answer, df_agent_steps
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  def run_and_submit_all( profile: gr.OAuthProfile | None, mock_submission: bool = False):
76
  """
77
  Fetches all questions, runs the BasicAgent on them, submits all answers,
@@ -156,10 +211,14 @@ def run_and_submit_all( profile: gr.OAuthProfile | None, mock_submission: bool =
156
  print(status_update)
157
 
158
  # 5. Save steps data to huggingface dataset
159
- print("Commiting steps data to huggingface dataset...")
160
- dataset = datasets.Dataset.from_pandas(df_agent_steps)
161
- dataset.push_to_hub("huytofu92/agent_steps_huggingface_course_unit4")
162
- print("Agent steps data committed to huggingface dataset.")
 
 
 
 
163
  # 6. Submit
164
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
165
  if mock_submission:
@@ -180,7 +239,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None, mock_submission: bool =
180
  )
181
  print("Submission successful.")
182
  results_df = pd.DataFrame(results_log, columns=["Task ID", "Question", "Submitted Answer"])
183
- print(results_df["Task ID", "Submitted Answer"].head(20))
184
  return final_status, results_df
185
  except requests.exceptions.HTTPError as e:
186
  error_detail = f"Server responded with status {e.response.status_code}."
@@ -192,25 +251,25 @@ def run_and_submit_all( profile: gr.OAuthProfile | None, mock_submission: bool =
192
  status_message = f"Submission Failed: {error_detail}"
193
  print(status_message)
194
  results_df = pd.DataFrame(results_log, columns=["Task ID", "Question", "Submitted Answer"])
195
- print(results_df["Task ID", "Submitted Answer"].head(20))
196
  return status_message, results_df
197
  except requests.exceptions.Timeout:
198
  status_message = "Submission Failed: The request timed out."
199
  print(status_message)
200
  results_df = pd.DataFrame(results_log, columns=["Task ID", "Question", "Submitted Answer"])
201
- print(results_df["Task ID", "Submitted Answer"].head(20))
202
  return status_message, results_df
203
  except requests.exceptions.RequestException as e:
204
  status_message = f"Submission Failed: Network error - {e}"
205
  print(status_message)
206
  results_df = pd.DataFrame(results_log, columns=["Task ID", "Question", "Submitted Answer"])
207
- print(results_df["Task ID", "Submitted Answer"].head(20))
208
  return status_message, results_df
209
  except Exception as e:
210
  status_message = f"An unexpected error occurred during submission: {e}"
211
  print(status_message)
212
  results_df = pd.DataFrame(results_log, columns=["Task ID", "Question", "Submitted Answer"])
213
- print(results_df["Task ID", "Submitted Answer"].head(20))
214
  return status_message, results_df
215
 
216
 
 
6
  from mini_agents import master_agent
7
  from utils import get_full_file_path
8
  from smolagents.memory import ActionStep, PlanningStep, TaskStep, SystemPromptStep, FinalAnswerStep
9
+ from typing import Optional
10
 
11
  # (Keep Constants as is)
12
  # --- Constants ---
 
73
  print(f"Agent returning fixed answer: {fixed_answer}")
74
  return fixed_answer, df_agent_steps
75
 
76
+ def check_required_env_vars() -> tuple[bool, Optional[str]]:
77
+ """Check if required environment variables are set"""
78
+ missing_vars = []
79
+
80
+ # Check HF_TOKEN
81
+ if not os.getenv("HF_TOKEN"):
82
+ missing_vars.append("HF_TOKEN")
83
+
84
+ # Check SPACE_ID (only warn, not required)
85
+ if not os.getenv("SPACE_ID"):
86
+ print("⚠️ SPACE_ID not set - this is normal when running locally")
87
+
88
+ if missing_vars:
89
+ return False, f"Missing required environment variables: {', '.join(missing_vars)}"
90
+ return True, None
91
+
92
+ def save_dataset_to_hub(df: pd.DataFrame, dataset_name: str) -> tuple[bool, str]:
93
+ """Save DataFrame to Hugging Face dataset with proper error handling"""
94
+ # Check environment variables
95
+ env_ok, env_error = check_required_env_vars()
96
+ if not env_ok:
97
+ return False, f"Cannot save dataset: {env_error}"
98
+
99
+ try:
100
+ if len(df) == 0:
101
+ return False, "Cannot save empty dataset"
102
+
103
+ print(f"Saving {len(df)} steps to {dataset_name}...")
104
+
105
+ # Convert to dataset
106
+ dataset = datasets.Dataset.from_pandas(df)
107
+
108
+ # Add metadata
109
+ dataset.info.description = "Agent steps data from evaluation run"
110
+ dataset.info.features = {
111
+ 'task_id': datasets.Value('string'),
112
+ 'step_class': datasets.Value('string'),
113
+ # Add other feature definitions as needed
114
+ }
115
+
116
+ # Save to hub with token
117
+ dataset.push_to_hub(
118
+ dataset_name,
119
+ private=True,
120
+ token=os.getenv("HF_TOKEN")
121
+ )
122
+
123
+ return True, f"Successfully saved {len(df)} steps to {dataset_name}"
124
+
125
+ except Exception as e:
126
+ error_msg = f"Error saving dataset: {str(e)}"
127
+ print(error_msg)
128
+ return False, error_msg
129
+
130
  def run_and_submit_all( profile: gr.OAuthProfile | None, mock_submission: bool = False):
131
  """
132
  Fetches all questions, runs the BasicAgent on them, submits all answers,
 
211
  print(status_update)
212
 
213
  # 5. Save steps data to huggingface dataset
214
+ print("\nSaving agent steps to Hugging Face dataset...")
215
+ success, message = save_dataset_to_hub(df_agent_steps, "huytofu92/agent_steps_huggingface_course_unit4")
216
+ if success:
217
+ print(message)
218
+ else:
219
+ print(f"⚠️ {message}")
220
+ print("Continuing with submission despite dataset save failure...")
221
+
222
  # 6. Submit
223
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
224
  if mock_submission:
 
239
  )
240
  print("Submission successful.")
241
  results_df = pd.DataFrame(results_log, columns=["Task ID", "Question", "Submitted Answer"])
242
+ print(results_df[["Task ID", "Submitted Answer"]].head(20))
243
  return final_status, results_df
244
  except requests.exceptions.HTTPError as e:
245
  error_detail = f"Server responded with status {e.response.status_code}."
 
251
  status_message = f"Submission Failed: {error_detail}"
252
  print(status_message)
253
  results_df = pd.DataFrame(results_log, columns=["Task ID", "Question", "Submitted Answer"])
254
+ print(results_df[["Task ID", "Submitted Answer"]].head(20))
255
  return status_message, results_df
256
  except requests.exceptions.Timeout:
257
  status_message = "Submission Failed: The request timed out."
258
  print(status_message)
259
  results_df = pd.DataFrame(results_log, columns=["Task ID", "Question", "Submitted Answer"])
260
+ print(results_df[["Task ID", "Submitted Answer"]].head(20))
261
  return status_message, results_df
262
  except requests.exceptions.RequestException as e:
263
  status_message = f"Submission Failed: Network error - {e}"
264
  print(status_message)
265
  results_df = pd.DataFrame(results_log, columns=["Task ID", "Question", "Submitted Answer"])
266
+ print(results_df[["Task ID", "Submitted Answer"]].head(20))
267
  return status_message, results_df
268
  except Exception as e:
269
  status_message = f"An unexpected error occurred during submission: {e}"
270
  print(status_message)
271
  results_df = pd.DataFrame(results_log, columns=["Task ID", "Question", "Submitted Answer"])
272
+ print(results_df[["Task ID", "Submitted Answer"]].head(20))
273
  return status_message, results_df
274
 
275