Spaces:
Sleeping
Sleeping
data handling
Browse files
app.py
CHANGED
@@ -143,22 +143,39 @@ def save_dataset_to_hub(df: pd.DataFrame, dataset_name: str) -> tuple[bool, str]
|
|
143 |
# Create a copy of the DataFrame to avoid modifying the original
|
144 |
df_to_save = df.copy()
|
145 |
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
if isinstance(x, dict):
|
154 |
-
return
|
155 |
if hasattr(x, 'dict'):
|
156 |
-
return x.dict()
|
157 |
if hasattr(x, '__dict__'):
|
158 |
-
return str(x.__dict__)
|
159 |
return str(x) if pd.notna(x) else None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
|
161 |
-
|
|
|
|
|
|
|
162 |
|
163 |
# Convert to dataset
|
164 |
dataset = datasets.Dataset.from_pandas(df_to_save)
|
@@ -181,6 +198,9 @@ def save_dataset_to_hub(df: pd.DataFrame, dataset_name: str) -> tuple[bool, str]
|
|
181 |
except Exception as e:
|
182 |
error_msg = f"Error saving dataset: {str(e)}"
|
183 |
print(error_msg)
|
|
|
|
|
|
|
184 |
return False, error_msg
|
185 |
|
186 |
def run_and_submit_all( profile: gr.OAuthProfile | None, mock_submission: bool = False):
|
|
|
143 |
# Create a copy of the DataFrame to avoid modifying the original
|
144 |
df_to_save = df.copy()
|
145 |
|
146 |
+
def ensure_consistent_type(x, column_name):
|
147 |
+
"""Ensure consistent type within a column"""
|
148 |
+
if x is None or x == "None":
|
149 |
+
return None
|
150 |
+
|
151 |
+
# Special handling for model_output_message and similar columns
|
152 |
+
if column_name in ['model_output_message', 'model_input_messages', 'tool_calls']:
|
153 |
if isinstance(x, dict):
|
154 |
+
return str(x) # Convert dict to string
|
155 |
if hasattr(x, 'dict'):
|
156 |
+
return str(x.dict()) # Convert object with dict() to string
|
157 |
if hasattr(x, '__dict__'):
|
158 |
+
return str(x.__dict__) # Convert object with __dict__ to string
|
159 |
return str(x) if pd.notna(x) else None
|
160 |
+
|
161 |
+
# For other columns, convert to string
|
162 |
+
if isinstance(x, (list, tuple, dict)):
|
163 |
+
return str(x)
|
164 |
+
if hasattr(x, 'dict'):
|
165 |
+
return str(x.dict())
|
166 |
+
if hasattr(x, '__dict__'):
|
167 |
+
return str(x.__dict__)
|
168 |
+
return str(x) if pd.notna(x) else None
|
169 |
+
|
170 |
+
# Convert all columns to consistent types
|
171 |
+
for col in df_to_save.columns:
|
172 |
+
print(f"Converting column: {col}")
|
173 |
+
df_to_save[col] = df_to_save[col].apply(lambda x: ensure_consistent_type(x, col))
|
174 |
|
175 |
+
# Verify column type consistency
|
176 |
+
sample_values = df_to_save[col].dropna().head()
|
177 |
+
if not sample_values.empty:
|
178 |
+
print(f"Sample values for {col}: {sample_values.iloc[0]}")
|
179 |
|
180 |
# Convert to dataset
|
181 |
dataset = datasets.Dataset.from_pandas(df_to_save)
|
|
|
198 |
except Exception as e:
|
199 |
error_msg = f"Error saving dataset: {str(e)}"
|
200 |
print(error_msg)
|
201 |
+
# Print more detailed error information
|
202 |
+
if hasattr(e, '__cause__') and e.__cause__:
|
203 |
+
print(f"Caused by: {str(e.__cause__)}")
|
204 |
return False, error_msg
|
205 |
|
206 |
def run_and_submit_all( profile: gr.OAuthProfile | None, mock_submission: bool = False):
|