huytofu92 commited on
Commit
c058184
·
1 Parent(s): 0390a00

data handling

Browse files
Files changed (1) hide show
  1. app.py +31 -11
app.py CHANGED
@@ -143,22 +143,39 @@ def save_dataset_to_hub(df: pd.DataFrame, dataset_name: str) -> tuple[bool, str]
143
  # Create a copy of the DataFrame to avoid modifying the original
144
  df_to_save = df.copy()
145
 
146
- # Convert all columns to basic Python types
147
- for col in df_to_save.columns:
148
- def convert_to_basic_type(x):
149
- if x is None or x == "None":
150
- return None
151
- if isinstance(x, (list, tuple)):
152
- return [str(item) if item is not None else None for item in x]
153
  if isinstance(x, dict):
154
- return {str(k): str(v) if v is not None else None for k, v in x.items()}
155
  if hasattr(x, 'dict'):
156
- return x.dict()
157
  if hasattr(x, '__dict__'):
158
- return str(x.__dict__)
159
  return str(x) if pd.notna(x) else None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
- df_to_save[col] = df_to_save[col].apply(convert_to_basic_type)
 
 
 
162
 
163
  # Convert to dataset
164
  dataset = datasets.Dataset.from_pandas(df_to_save)
@@ -181,6 +198,9 @@ def save_dataset_to_hub(df: pd.DataFrame, dataset_name: str) -> tuple[bool, str]
181
  except Exception as e:
182
  error_msg = f"Error saving dataset: {str(e)}"
183
  print(error_msg)
 
 
 
184
  return False, error_msg
185
 
186
  def run_and_submit_all( profile: gr.OAuthProfile | None, mock_submission: bool = False):
 
143
  # Create a copy of the DataFrame to avoid modifying the original
144
  df_to_save = df.copy()
145
 
146
+ def ensure_consistent_type(x, column_name):
147
+ """Ensure consistent type within a column"""
148
+ if x is None or x == "None":
149
+ return None
150
+
151
+ # Special handling for model_output_message and similar columns
152
+ if column_name in ['model_output_message', 'model_input_messages', 'tool_calls']:
153
  if isinstance(x, dict):
154
+ return str(x) # Convert dict to string
155
  if hasattr(x, 'dict'):
156
+ return str(x.dict()) # Convert object with dict() to string
157
  if hasattr(x, '__dict__'):
158
+ return str(x.__dict__) # Convert object with __dict__ to string
159
  return str(x) if pd.notna(x) else None
160
+
161
+ # For other columns, convert to string
162
+ if isinstance(x, (list, tuple, dict)):
163
+ return str(x)
164
+ if hasattr(x, 'dict'):
165
+ return str(x.dict())
166
+ if hasattr(x, '__dict__'):
167
+ return str(x.__dict__)
168
+ return str(x) if pd.notna(x) else None
169
+
170
+ # Convert all columns to consistent types
171
+ for col in df_to_save.columns:
172
+ print(f"Converting column: {col}")
173
+ df_to_save[col] = df_to_save[col].apply(lambda x: ensure_consistent_type(x, col))
174
 
175
+ # Verify column type consistency
176
+ sample_values = df_to_save[col].dropna().head()
177
+ if not sample_values.empty:
178
+ print(f"Sample values for {col}: {sample_values.iloc[0]}")
179
 
180
  # Convert to dataset
181
  dataset = datasets.Dataset.from_pandas(df_to_save)
 
198
  except Exception as e:
199
  error_msg = f"Error saving dataset: {str(e)}"
200
  print(error_msg)
201
+ # Print more detailed error information
202
+ if hasattr(e, '__cause__') and e.__cause__:
203
+ print(f"Caused by: {str(e.__cause__)}")
204
  return False, error_msg
205
 
206
  def run_and_submit_all( profile: gr.OAuthProfile | None, mock_submission: bool = False):