Spaces:
Sleeping
Sleeping
data saving
Browse files
app.py
CHANGED
@@ -140,43 +140,33 @@ def save_dataset_to_hub(df: pd.DataFrame, dataset_name: str) -> tuple[bool, str]
|
|
140 |
|
141 |
print(f"Saving {len(df)} steps to {dataset_name}...")
|
142 |
|
143 |
-
#
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
|
155 |
# Convert to dataset
|
156 |
-
dataset = datasets.Dataset.from_pandas(
|
157 |
|
158 |
-
# Add metadata
|
159 |
dataset.info.description = "Agent steps data from evaluation run"
|
160 |
dataset.info.features = {
|
161 |
-
|
162 |
-
'step_class': datasets.Value('string'),
|
163 |
-
'model_input_messages': datasets.Value('string'),
|
164 |
-
'tool_calls': datasets.Value('string'),
|
165 |
-
'start_time': datasets.Value('string'),
|
166 |
-
'end_time': datasets.Value('string'),
|
167 |
-
'step_number': datasets.Value('int64'),
|
168 |
-
'error': datasets.Value('string'),
|
169 |
-
'duration': datasets.Value('float64'),
|
170 |
-
'model_output_message': datasets.Value('string'),
|
171 |
-
'model_output': datasets.Value('string'),
|
172 |
-
'observations': datasets.Value('string'),
|
173 |
-
'observations_images': datasets.Value('string'),
|
174 |
-
'action_output': datasets.Value('string'),
|
175 |
-
'plan': datasets.Value('string'),
|
176 |
-
'task': datasets.Value('string'),
|
177 |
-
'task_images': datasets.Value('string'),
|
178 |
-
'system_prompt': datasets.Value('string'),
|
179 |
-
'final_answer': datasets.Value('string')
|
180 |
}
|
181 |
|
182 |
# Save to hub with token
|
@@ -186,7 +176,7 @@ def save_dataset_to_hub(df: pd.DataFrame, dataset_name: str) -> tuple[bool, str]
|
|
186 |
token=os.getenv("HUGGINGFACE_API_KEY")
|
187 |
)
|
188 |
|
189 |
-
return True, f"Successfully saved {len(
|
190 |
|
191 |
except Exception as e:
|
192 |
error_msg = f"Error saving dataset: {str(e)}"
|
|
|
140 |
|
141 |
print(f"Saving {len(df)} steps to {dataset_name}...")
|
142 |
|
143 |
+
# Create a copy of the DataFrame to avoid modifying the original
|
144 |
+
df_to_save = df.copy()
|
145 |
+
|
146 |
+
# Convert all columns to basic Python types
|
147 |
+
for col in df_to_save.columns:
|
148 |
+
def convert_to_basic_type(x):
|
149 |
+
if x is None or x == "None":
|
150 |
+
return None
|
151 |
+
if isinstance(x, (list, tuple)):
|
152 |
+
return [str(item) if item is not None else None for item in x]
|
153 |
+
if isinstance(x, dict):
|
154 |
+
return {str(k): str(v) if v is not None else None for k, v in x.items()}
|
155 |
+
if hasattr(x, 'dict'):
|
156 |
+
return x.dict()
|
157 |
+
if hasattr(x, '__dict__'):
|
158 |
+
return str(x.__dict__)
|
159 |
+
return str(x) if pd.notna(x) else None
|
160 |
+
|
161 |
+
df_to_save[col] = df_to_save[col].apply(convert_to_basic_type)
|
162 |
|
163 |
# Convert to dataset
|
164 |
+
dataset = datasets.Dataset.from_pandas(df_to_save)
|
165 |
|
166 |
+
# Add metadata with explicit string types for all columns
|
167 |
dataset.info.description = "Agent steps data from evaluation run"
|
168 |
dataset.info.features = {
|
169 |
+
col: datasets.Value('string') for col in df_to_save.columns
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
}
|
171 |
|
172 |
# Save to hub with token
|
|
|
176 |
token=os.getenv("HUGGINGFACE_API_KEY")
|
177 |
)
|
178 |
|
179 |
+
return True, f"Successfully saved {len(df_to_save)} steps to {dataset_name}"
|
180 |
|
181 |
except Exception as e:
|
182 |
error_msg = f"Error saving dataset: {str(e)}"
|