Spaces:

girishwangikar
/

SmolAgents_DA

Running

App Files Files Community

girishwangikar commited on Jan 7

Commit

e5df187

verified ·

1 Parent(s): f65d1c7

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -6

app.py CHANGED Viewed

@@ -35,7 +35,20 @@ class GroqLLM:
 @tool
 def analyze_basic_stats(data: pd.DataFrame) -> str:
-    """Calculate basic statistical measures for numerical columns in the dataset."""
     stats = {}
     numeric_cols = data.select_dtypes(include=[np.number]).columns
@@ -52,7 +65,20 @@ def analyze_basic_stats(data: pd.DataFrame) -> str:
 @tool
 def generate_correlation_matrix(data: pd.DataFrame) -> str:
-    """Generate a visual correlation matrix for numerical columns in the dataset."""
     numeric_data = data.select_dtypes(include=[np.number])
     plt.figure(figsize=(10, 8))
@@ -66,7 +92,20 @@ def generate_correlation_matrix(data: pd.DataFrame) -> str:
 @tool
 def analyze_categorical_columns(data: pd.DataFrame) -> str:
-    """Analyze categorical columns in the dataset for distribution and frequencies."""
     categorical_cols = data.select_dtypes(include=['object', 'category']).columns
     analysis = {}
@@ -81,7 +120,20 @@ def analyze_categorical_columns(data: pd.DataFrame) -> str:
 @tool
 def suggest_features(data: pd.DataFrame) -> str:
-    """Suggest potential feature engineering steps based on data characteristics."""
     suggestions = []
     numeric_cols = data.select_dtypes(include=[np.number]).columns
     categorical_cols = data.select_dtypes(include=['object', 'category']).columns
@@ -124,11 +176,11 @@ def main():
                     st.session_state['data'] = data
                     st.session_state['file_uploaded'] = True
-                    # Initialize agent with GroqLLM instead of GroqModel
                     st.session_state['agent'] = CodeAgent(
                         tools=[analyze_basic_stats, generate_correlation_matrix,
                                analyze_categorical_columns, suggest_features],
-                        model=GroqLLM(),  # Fixed: Using GroqLLM instead of GroqModel
                         additional_authorized_imports=["pandas", "numpy", "matplotlib", "seaborn"]
                     )

 @tool
 def analyze_basic_stats(data: pd.DataFrame) -> str:
+    """Calculate basic statistical measures for numerical columns in the dataset.
+    This function computes fundamental statistical metrics including mean, median,
+    standard deviation, skewness, and counts of missing values for all numerical
+    columns in the provided DataFrame.
+    Args:
+        data: A pandas DataFrame containing the dataset to analyze. The DataFrame
+            should contain at least one numerical column for meaningful analysis.
+    Returns:
+        str: A string containing formatted basic statistics for each numerical column,
+            including mean, median, standard deviation, skewness, and missing value counts.
+    """
     stats = {}
     numeric_cols = data.select_dtypes(include=[np.number]).columns
 @tool
 def generate_correlation_matrix(data: pd.DataFrame) -> str:
+    """Generate a visual correlation matrix for numerical columns in the dataset.
+    This function creates a heatmap visualization showing the correlations between
+    all numerical columns in the dataset. The correlation values are displayed
+    using a color-coded matrix for easy interpretation.
+    Args:
+        data: A pandas DataFrame containing the dataset to analyze. The DataFrame
+            should contain at least two numerical columns for correlation analysis.
+    Returns:
+        str: A base64 encoded string representing the correlation matrix plot image,
+            which can be displayed in a web interface or saved as an image file.
+    """
     numeric_data = data.select_dtypes(include=[np.number])
     plt.figure(figsize=(10, 8))
 @tool
 def analyze_categorical_columns(data: pd.DataFrame) -> str:
+    """Analyze categorical columns in the dataset for distribution and frequencies.
+    This function examines categorical columns to identify unique values, top categories,
+    and missing value counts, providing insights into the categorical data distribution.
+    Args:
+        data: A pandas DataFrame containing the dataset to analyze. The DataFrame
+            should contain at least one categorical column (object or category dtype)
+            for meaningful analysis.
+    Returns:
+        str: A string containing formatted analysis results for each categorical column,
+            including unique value counts, top categories, and missing value counts.
+    """
     categorical_cols = data.select_dtypes(include=['object', 'category']).columns
     analysis = {}
 @tool
 def suggest_features(data: pd.DataFrame) -> str:
+    """Suggest potential feature engineering steps based on data characteristics.
+    This function analyzes the dataset's structure and statistical properties to
+    recommend possible feature engineering steps that could improve model performance.
+    Args:
+        data: A pandas DataFrame containing the dataset to analyze. The DataFrame
+            can contain both numerical and categorical columns for feature
+            engineering suggestions.
+    Returns:
+        str: A string containing line-separated suggestions for feature engineering,
+            based on the characteristics of the input data.
+    """
     suggestions = []
     numeric_cols = data.select_dtypes(include=[np.number]).columns
     categorical_cols = data.select_dtypes(include=['object', 'category']).columns
                     st.session_state['data'] = data
                     st.session_state['file_uploaded'] = True
+                    # Initialize agent with GroqLLM
                     st.session_state['agent'] = CodeAgent(
                         tools=[analyze_basic_stats, generate_correlation_matrix,
                                analyze_categorical_columns, suggest_features],
+                        model=GroqLLM(),
                         additional_authorized_imports=["pandas", "numpy", "matplotlib", "seaborn"]
                     )