huytofu92 commited on
Commit
146fc91
·
1 Parent(s): c058184

Data saving againagain

Browse files
Files changed (1) hide show
  1. app.py +53 -23
app.py CHANGED
@@ -7,6 +7,7 @@ from mini_agents import MasterAgentWrapper
7
  from utils import get_full_file_path
8
  from smolagents.memory import ActionStep, PlanningStep, TaskStep, SystemPromptStep, FinalAnswerStep
9
  from typing import Optional
 
10
 
11
  # (Keep Constants as is)
12
  # --- Constants ---
@@ -143,39 +144,68 @@ def save_dataset_to_hub(df: pd.DataFrame, dataset_name: str) -> tuple[bool, str]
143
  # Create a copy of the DataFrame to avoid modifying the original
144
  df_to_save = df.copy()
145
 
 
 
 
 
 
 
 
 
 
 
146
  def ensure_consistent_type(x, column_name):
147
  """Ensure consistent type within a column"""
148
- if x is None or x == "None":
149
- return None
150
-
151
- # Special handling for model_output_message and similar columns
152
- if column_name in ['model_output_message', 'model_input_messages', 'tool_calls']:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  if isinstance(x, dict):
154
- return str(x) # Convert dict to string
155
  if hasattr(x, 'dict'):
156
- return str(x.dict()) # Convert object with dict() to string
157
  if hasattr(x, '__dict__'):
158
- return str(x.__dict__) # Convert object with __dict__ to string
159
- return str(x) if pd.notna(x) else None
160
-
161
- # For other columns, convert to string
162
- if isinstance(x, (list, tuple, dict)):
163
  return str(x)
164
- if hasattr(x, 'dict'):
165
- return str(x.dict())
166
- if hasattr(x, '__dict__'):
167
- return str(x.__dict__)
168
- return str(x) if pd.notna(x) else None
169
 
170
  # Convert all columns to consistent types
171
  for col in df_to_save.columns:
172
  print(f"Converting column: {col}")
173
- df_to_save[col] = df_to_save[col].apply(lambda x: ensure_consistent_type(x, col))
174
-
175
- # Verify column type consistency
176
- sample_values = df_to_save[col].dropna().head()
177
- if not sample_values.empty:
178
- print(f"Sample values for {col}: {sample_values.iloc[0]}")
 
 
 
 
 
 
 
 
 
 
179
 
180
  # Convert to dataset
181
  dataset = datasets.Dataset.from_pandas(df_to_save)
 
7
  from utils import get_full_file_path
8
  from smolagents.memory import ActionStep, PlanningStep, TaskStep, SystemPromptStep, FinalAnswerStep
9
  from typing import Optional
10
+ import numpy as np
11
 
12
  # (Keep Constants as is)
13
  # --- Constants ---
 
144
  # Create a copy of the DataFrame to avoid modifying the original
145
  df_to_save = df.copy()
146
 
147
+ def is_none_or_nan(x):
148
+ """Safely check if a value is None or NaN"""
149
+ if x is None:
150
+ return True
151
+ if isinstance(x, (float, np.floating)) and np.isnan(x):
152
+ return True
153
+ if x == "None" or x == "nan" or x == "NaN":
154
+ return True
155
+ return False
156
+
157
  def ensure_consistent_type(x, column_name):
158
  """Ensure consistent type within a column"""
159
+ try:
160
+ if is_none_or_nan(x):
161
+ return None
162
+
163
+ # Special handling for model_input_messages and similar columns
164
+ if column_name in ['model_input_messages', 'model_output_message', 'tool_calls']:
165
+ if isinstance(x, (list, tuple, np.ndarray)):
166
+ # Convert each item in the array/list to string
167
+ return str([str(item) if not is_none_or_nan(item) else None for item in x])
168
+ if isinstance(x, dict):
169
+ return str(x)
170
+ if hasattr(x, 'dict'):
171
+ return str(x.dict())
172
+ if hasattr(x, '__dict__'):
173
+ return str(x.__dict__)
174
+ return str(x)
175
+
176
+ # For other columns, convert to string
177
+ if isinstance(x, (list, tuple, np.ndarray)):
178
+ return str(x.tolist() if hasattr(x, 'tolist') else list(x))
179
  if isinstance(x, dict):
180
+ return str(x)
181
  if hasattr(x, 'dict'):
182
+ return str(x.dict())
183
  if hasattr(x, '__dict__'):
184
+ return str(x.__dict__)
 
 
 
 
185
  return str(x)
186
+ except Exception as e:
187
+ print(f"Warning: Error converting value in column {column_name}: {str(e)}")
188
+ return str(x) if not is_none_or_nan(x) else None
 
 
189
 
190
  # Convert all columns to consistent types
191
  for col in df_to_save.columns:
192
  print(f"Converting column: {col}")
193
+ try:
194
+ # Handle numpy arrays and pandas series
195
+ if isinstance(df_to_save[col], (np.ndarray, pd.Series)):
196
+ # Convert None/NaN to None, everything else to string
197
+ df_to_save[col] = df_to_save[col].apply(lambda x: None if is_none_or_nan(x) else str(x))
198
+ else:
199
+ df_to_save[col] = df_to_save[col].apply(lambda x: ensure_consistent_type(x, col))
200
+
201
+ # Verify column type consistency
202
+ sample_values = df_to_save[col].dropna().head()
203
+ if not sample_values.empty:
204
+ print(f"Sample values for {col}: {sample_values.iloc[0]}")
205
+ except Exception as e:
206
+ print(f"Warning: Error processing column {col}: {str(e)}")
207
+ # If there's an error, try to convert the entire column to string
208
+ df_to_save[col] = df_to_save[col].apply(lambda x: None if is_none_or_nan(x) else str(x))
209
 
210
  # Convert to dataset
211
  dataset = datasets.Dataset.from_pandas(df_to_save)