huytofu92 commited on
Commit
8ed7d27
·
1 Parent(s): 4c52ca4

Data saving

Browse files
Files changed (1) hide show
  1. app.py +43 -3
app.py CHANGED
@@ -56,6 +56,18 @@ class BasicAgent:
56
  all_steps = self.agent.master_agent.memory.steps
57
  new_rows = [] # List to store new rows
58
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  for step in all_steps:
60
  if isinstance(step, ActionStep):
61
  step_class = "ActionStep"
@@ -72,13 +84,20 @@ class BasicAgent:
72
 
73
  step_dict = step.dict()
74
  # Create a new row with default None values
75
- new_row = {col: None for col in df_agent_steps.columns}
76
  # Update with actual values
77
  new_row['task_id'] = task_id
78
  new_row['step_class'] = step_class
 
 
79
  for key, value in step_dict.items():
80
  if key in df_agent_steps.columns:
81
- new_row[key] = value
 
 
 
 
 
82
  new_rows.append(new_row)
83
 
84
  # Append all new rows at once
@@ -121,6 +140,11 @@ def save_dataset_to_hub(df: pd.DataFrame, dataset_name: str) -> tuple[bool, str]
121
 
122
  print(f"Saving {len(df)} steps to {dataset_name}...")
123
 
 
 
 
 
 
124
  # Convert to dataset
125
  dataset = datasets.Dataset.from_pandas(df)
126
 
@@ -129,7 +153,23 @@ def save_dataset_to_hub(df: pd.DataFrame, dataset_name: str) -> tuple[bool, str]
129
  dataset.info.features = {
130
  'task_id': datasets.Value('string'),
131
  'step_class': datasets.Value('string'),
132
- # Add other feature definitions as needed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  }
134
 
135
  # Save to hub with token
 
56
  all_steps = self.agent.master_agent.memory.steps
57
  new_rows = [] # List to store new rows
58
 
59
+ def serialize_value(value):
60
+ """Convert complex objects to serializable format"""
61
+ if hasattr(value, 'dict'):
62
+ return value.dict()
63
+ elif hasattr(value, '__dict__'):
64
+ return str(value.__dict__)
65
+ elif isinstance(value, (list, tuple)):
66
+ return [serialize_value(item) for item in value]
67
+ elif isinstance(value, dict):
68
+ return {k: serialize_value(v) for k, v in value.items()}
69
+ return value
70
+
71
  for step in all_steps:
72
  if isinstance(step, ActionStep):
73
  step_class = "ActionStep"
 
84
 
85
  step_dict = step.dict()
86
  # Create a new row with default None values
87
+ new_row = {col: "None" for col in df_agent_steps.columns}
88
  # Update with actual values
89
  new_row['task_id'] = task_id
90
  new_row['step_class'] = step_class
91
+
92
+ # Serialize complex objects before adding to DataFrame
93
  for key, value in step_dict.items():
94
  if key in df_agent_steps.columns:
95
+ try:
96
+ new_row[key] = serialize_value(value)
97
+ except Exception as e:
98
+ print(f"Warning: Could not serialize {key}, using string representation: {e}")
99
+ new_row[key] = str(value)
100
+
101
  new_rows.append(new_row)
102
 
103
  # Append all new rows at once
 
140
 
141
  print(f"Saving {len(df)} steps to {dataset_name}...")
142
 
143
+ # Convert complex types to strings before creating dataset
144
+ for col in df.columns:
145
+ if df[col].dtype == 'object':
146
+ df[col] = df[col].apply(lambda x: str(x) if pd.notnull(x) else None)
147
+
148
  # Convert to dataset
149
  dataset = datasets.Dataset.from_pandas(df)
150
 
 
153
  dataset.info.features = {
154
  'task_id': datasets.Value('string'),
155
  'step_class': datasets.Value('string'),
156
+ 'model_input_messages': datasets.Value('string'),
157
+ 'tool_calls': datasets.Value('string'),
158
+ 'start_time': datasets.Value('string'),
159
+ 'end_time': datasets.Value('string'),
160
+ 'step_number': datasets.Value('int64'),
161
+ 'error': datasets.Value('string'),
162
+ 'duration': datasets.Value('float64'),
163
+ 'model_output_message': datasets.Value('string'),
164
+ 'model_output': datasets.Value('string'),
165
+ 'observations': datasets.Value('string'),
166
+ 'observations_images': datasets.Value('string'),
167
+ 'action_output': datasets.Value('string'),
168
+ 'plan': datasets.Value('string'),
169
+ 'task': datasets.Value('string'),
170
+ 'task_images': datasets.Value('string'),
171
+ 'system_prompt': datasets.Value('string'),
172
+ 'final_answer': datasets.Value('string')
173
  }
174
 
175
  # Save to hub with token