Final_Assignment_Template

Sleeping

App Files Files Community

huytofu92 commited on May 22

Commit

c058184

1 Parent(s): 0390a00

data handling

Browse files

Files changed (1) hide show

app.py +31 -11

app.py CHANGED Viewed

@@ -143,22 +143,39 @@ def save_dataset_to_hub(df: pd.DataFrame, dataset_name: str) -> tuple[bool, str]
         # Create a copy of the DataFrame to avoid modifying the original
         df_to_save = df.copy()
-        # Convert all columns to basic Python types
-        for col in df_to_save.columns:
-            def convert_to_basic_type(x):
-                if x is None or x == "None":
-                    return None
-                if isinstance(x, (list, tuple)):
-                    return [str(item) if item is not None else None for item in x]
                 if isinstance(x, dict):
-                    return {str(k): str(v) if v is not None else None for k, v in x.items()}
                 if hasattr(x, 'dict'):
-                    return x.dict()
                 if hasattr(x, '__dict__'):
-                    return str(x.__dict__)
                 return str(x) if pd.notna(x) else None
-            df_to_save[col] = df_to_save[col].apply(convert_to_basic_type)
         # Convert to dataset
         dataset = datasets.Dataset.from_pandas(df_to_save)
@@ -181,6 +198,9 @@ def save_dataset_to_hub(df: pd.DataFrame, dataset_name: str) -> tuple[bool, str]
     except Exception as e:
         error_msg = f"Error saving dataset: {str(e)}"
         print(error_msg)
         return False, error_msg
 def run_and_submit_all( profile: gr.OAuthProfile | None, mock_submission: bool = False):

         # Create a copy of the DataFrame to avoid modifying the original
         df_to_save = df.copy()
+        def ensure_consistent_type(x, column_name):
+            """Ensure consistent type within a column"""
+            if x is None or x == "None":
+                return None
+            # Special handling for model_output_message and similar columns
+            if column_name in ['model_output_message', 'model_input_messages', 'tool_calls']:
                 if isinstance(x, dict):
+                    return str(x)  # Convert dict to string
                 if hasattr(x, 'dict'):
+                    return str(x.dict())  # Convert object with dict() to string
                 if hasattr(x, '__dict__'):
+                    return str(x.__dict__)  # Convert object with __dict__ to string
                 return str(x) if pd.notna(x) else None
+            # For other columns, convert to string
+            if isinstance(x, (list, tuple, dict)):
+                return str(x)
+            if hasattr(x, 'dict'):
+                return str(x.dict())
+            if hasattr(x, '__dict__'):
+                return str(x.__dict__)
+            return str(x) if pd.notna(x) else None
+        # Convert all columns to consistent types
+        for col in df_to_save.columns:
+            print(f"Converting column: {col}")
+            df_to_save[col] = df_to_save[col].apply(lambda x: ensure_consistent_type(x, col))
+            # Verify column type consistency
+            sample_values = df_to_save[col].dropna().head()
+            if not sample_values.empty:
+                print(f"Sample values for {col}: {sample_values.iloc[0]}")
         # Convert to dataset
         dataset = datasets.Dataset.from_pandas(df_to_save)
     except Exception as e:
         error_msg = f"Error saving dataset: {str(e)}"
         print(error_msg)
+        # Print more detailed error information
+        if hasattr(e, '__cause__') and e.__cause__:
+            print(f"Caused by: {str(e.__cause__)}")
         return False, error_msg
 def run_and_submit_all( profile: gr.OAuthProfile | None, mock_submission: bool = False):