huytofu92 commited on
Commit
0390a00
·
1 Parent(s): 672bbd1

data saving

Browse files
Files changed (1) hide show
  1. app.py +23 -33
app.py CHANGED
@@ -140,43 +140,33 @@ def save_dataset_to_hub(df: pd.DataFrame, dataset_name: str) -> tuple[bool, str]
140
 
141
  print(f"Saving {len(df)} steps to {dataset_name}...")
142
 
143
- # Convert complex types to strings before creating dataset
144
- for col in df.columns:
145
- if df[col].dtype == 'object':
146
- def safe_convert(x):
147
- if x is None or x == "None":
148
- return None
149
- if isinstance(x, (list, tuple, dict)):
150
- return str(x)
151
- return str(x) if pd.notna(x) else None
152
-
153
- df[col] = df[col].apply(safe_convert)
 
 
 
 
 
 
 
 
154
 
155
  # Convert to dataset
156
- dataset = datasets.Dataset.from_pandas(df)
157
 
158
- # Add metadata
159
  dataset.info.description = "Agent steps data from evaluation run"
160
  dataset.info.features = {
161
- 'task_id': datasets.Value('string'),
162
- 'step_class': datasets.Value('string'),
163
- 'model_input_messages': datasets.Value('string'),
164
- 'tool_calls': datasets.Value('string'),
165
- 'start_time': datasets.Value('string'),
166
- 'end_time': datasets.Value('string'),
167
- 'step_number': datasets.Value('int64'),
168
- 'error': datasets.Value('string'),
169
- 'duration': datasets.Value('float64'),
170
- 'model_output_message': datasets.Value('string'),
171
- 'model_output': datasets.Value('string'),
172
- 'observations': datasets.Value('string'),
173
- 'observations_images': datasets.Value('string'),
174
- 'action_output': datasets.Value('string'),
175
- 'plan': datasets.Value('string'),
176
- 'task': datasets.Value('string'),
177
- 'task_images': datasets.Value('string'),
178
- 'system_prompt': datasets.Value('string'),
179
- 'final_answer': datasets.Value('string')
180
  }
181
 
182
  # Save to hub with token
@@ -186,7 +176,7 @@ def save_dataset_to_hub(df: pd.DataFrame, dataset_name: str) -> tuple[bool, str]
186
  token=os.getenv("HUGGINGFACE_API_KEY")
187
  )
188
 
189
- return True, f"Successfully saved {len(df)} steps to {dataset_name}"
190
 
191
  except Exception as e:
192
  error_msg = f"Error saving dataset: {str(e)}"
 
140
 
141
  print(f"Saving {len(df)} steps to {dataset_name}...")
142
 
143
+ # Create a copy of the DataFrame to avoid modifying the original
144
+ df_to_save = df.copy()
145
+
146
+ # Convert all columns to basic Python types
147
+ for col in df_to_save.columns:
148
+ def convert_to_basic_type(x):
149
+ if x is None or x == "None":
150
+ return None
151
+ if isinstance(x, (list, tuple)):
152
+ return [str(item) if item is not None else None for item in x]
153
+ if isinstance(x, dict):
154
+ return {str(k): str(v) if v is not None else None for k, v in x.items()}
155
+ if hasattr(x, 'dict'):
156
+ return x.dict()
157
+ if hasattr(x, '__dict__'):
158
+ return str(x.__dict__)
159
+ return str(x) if pd.notna(x) else None
160
+
161
+ df_to_save[col] = df_to_save[col].apply(convert_to_basic_type)
162
 
163
  # Convert to dataset
164
+ dataset = datasets.Dataset.from_pandas(df_to_save)
165
 
166
+ # Add metadata with explicit string types for all columns
167
  dataset.info.description = "Agent steps data from evaluation run"
168
  dataset.info.features = {
169
+ col: datasets.Value('string') for col in df_to_save.columns
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  }
171
 
172
  # Save to hub with token
 
176
  token=os.getenv("HUGGINGFACE_API_KEY")
177
  )
178
 
179
+ return True, f"Successfully saved {len(df_to_save)} steps to {dataset_name}"
180
 
181
  except Exception as e:
182
  error_msg = f"Error saving dataset: {str(e)}"