Spaces:

mgbam
/

DataBiz

Sleeping

App Files Files Community

mgbam commited on Jan 28

Commit

f60d18c

verified ·

1 Parent(s): b1ccc30

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -21

app.py CHANGED Viewed

@@ -131,7 +131,7 @@ class DataAnalysisAgent(CodeAgent):
 # ------------------------------
 @tool
-def analyze_basic_stats(data: pd.DataFrame) -> str:
     """
     Calculate and visualize basic statistical measures for numerical columns.
@@ -141,9 +141,11 @@ def analyze_basic_stats(data: pd.DataFrame) -> str:
     the mean, median, and standard deviation for each numerical feature.
     Args:
-        data (pd.DataFrame): A pandas DataFrame containing the dataset to analyze.
-                             The DataFrame should contain at least one numerical column
-                             for meaningful analysis.
     Returns:
         str: A markdown-formatted string containing the statistics and the generated plot.
@@ -184,7 +186,7 @@ def analyze_basic_stats(data: pd.DataFrame) -> str:
     return f"### Basic Statistics\n{stats_df.to_markdown()} \n\n![Basic Statistics](data:image/png;base64,{stats_plot})"
 @tool
-def generate_correlation_matrix(data: pd.DataFrame) -> str:
     """
     Generate an interactive correlation matrix using Plotly.
@@ -193,9 +195,11 @@ def generate_correlation_matrix(data: pd.DataFrame) -> str:
     and interact with the plot (zoom, pan).
     Args:
-        data (pd.DataFrame): A pandas DataFrame containing the dataset to analyze.
-                             The DataFrame should contain at least two numerical columns
-                             for correlation analysis.
     Returns:
         str: An HTML string representing the interactive correlation matrix plot.
@@ -220,7 +224,7 @@ def generate_correlation_matrix(data: pd.DataFrame) -> str:
     return correlation_html
 @tool
-def analyze_categorical_columns(data: pd.DataFrame) -> str:
     """
     Analyze categorical columns with visualizations.
@@ -229,9 +233,11 @@ def analyze_categorical_columns(data: pd.DataFrame) -> str:
     categorical feature.
     Args:
-        data (pd.DataFrame): A pandas DataFrame containing the dataset to analyze.
-                             The DataFrame should contain at least one categorical column
-                             for meaningful analysis.
     Returns:
         str: A markdown-formatted string containing analysis results and embedded plots.
@@ -275,7 +281,7 @@ def analyze_categorical_columns(data: pd.DataFrame) -> str:
     return plots + f"### Categorical Columns Analysis\n{pd.DataFrame(analysis).T.to_markdown()}"
 @tool
-def suggest_features(data: pd.DataFrame) -> str:
     """
     Suggest potential feature engineering steps based on data characteristics.
@@ -283,8 +289,10 @@ def suggest_features(data: pd.DataFrame) -> str:
     recommend possible feature engineering steps that could improve model performance.
     Args:
-        data (pd.DataFrame): A pandas DataFrame containing the dataset to analyze.
-                             The DataFrame can contain both numerical and categorical columns.
     Returns:
         str: A string containing suggestions for feature engineering based on
@@ -322,7 +330,7 @@ def suggest_features(data: pd.DataFrame) -> str:
     return "\n".join(suggestions)
 @tool
-def predictive_analysis(data: pd.DataFrame, target: str) -> str:
     """
     Perform predictive analytics by training a classification model.
@@ -330,9 +338,13 @@ def predictive_analysis(data: pd.DataFrame, target: str) -> str:
     and provides detailed metrics and visualizations such as the confusion matrix and ROC curve.
     Args:
-        data (pd.DataFrame): A pandas DataFrame containing the dataset to analyze.
-                             The DataFrame should contain the target variable for prediction.
-        target (str): The name of the target variable column in the dataset.
     Returns:
         str: A markdown-formatted string containing the classification report, confusion matrix,
@@ -341,8 +353,8 @@ def predictive_analysis(data: pd.DataFrame, target: str) -> str:
     if data is None:
         data = tool.agent.dataset
-    if target not in data.columns:
-        return f"Error: Target column `{target}` not found in the dataset."
     # Handle categorical target
     if data[target].dtype == 'object' or data[target].dtype.name == 'category':

 # ------------------------------
 @tool
+def analyze_basic_stats(data: Optional[pd.DataFrame] = None) -> str:
     """
     Calculate and visualize basic statistical measures for numerical columns.
     the mean, median, and standard deviation for each numerical feature.
     Args:
+        data (Optional[pd.DataFrame]):
+            A pandas DataFrame containing the dataset to analyze.
+            If None, the agent's stored dataset will be used.
+            The DataFrame should contain at least one numerical column
+            for meaningful analysis.
     Returns:
         str: A markdown-formatted string containing the statistics and the generated plot.
     return f"### Basic Statistics\n{stats_df.to_markdown()} \n\n![Basic Statistics](data:image/png;base64,{stats_plot})"
 @tool
+def generate_correlation_matrix(data: Optional[pd.DataFrame] = None) -> str:
     """
     Generate an interactive correlation matrix using Plotly.
     and interact with the plot (zoom, pan).
     Args:
+        data (Optional[pd.DataFrame]):
+            A pandas DataFrame containing the dataset to analyze.
+            If None, the agent's stored dataset will be used.
+            The DataFrame should contain at least two numerical columns
+            for correlation analysis.
     Returns:
         str: An HTML string representing the interactive correlation matrix plot.
     return correlation_html
 @tool
+def analyze_categorical_columns(data: Optional[pd.DataFrame] = None) -> str:
     """
     Analyze categorical columns with visualizations.
     categorical feature.
     Args:
+        data (Optional[pd.DataFrame]):
+            A pandas DataFrame containing the dataset to analyze.
+            If None, the agent's stored dataset will be used.
+            The DataFrame should contain at least one categorical column
+            for meaningful analysis.
     Returns:
         str: A markdown-formatted string containing analysis results and embedded plots.
     return plots + f"### Categorical Columns Analysis\n{pd.DataFrame(analysis).T.to_markdown()}"
 @tool
+def suggest_features(data: Optional[pd.DataFrame] = None) -> str:
     """
     Suggest potential feature engineering steps based on data characteristics.
     recommend possible feature engineering steps that could improve model performance.
     Args:
+        data (Optional[pd.DataFrame]):
+            A pandas DataFrame containing the dataset to analyze.
+            If None, the agent's stored dataset will be used.
+            The DataFrame can contain both numerical and categorical columns.
     Returns:
         str: A string containing suggestions for feature engineering based on
     return "\n".join(suggestions)
 @tool
+def predictive_analysis(data: Optional[pd.DataFrame] = None, target: Optional[str] = None) -> str:
     """
     Perform predictive analytics by training a classification model.
     and provides detailed metrics and visualizations such as the confusion matrix and ROC curve.
     Args:
+        data (Optional[pd.DataFrame]):
+            A pandas DataFrame containing the dataset to analyze.
+            If None, the agent's stored dataset will be used.
+            The DataFrame should contain the target variable for prediction.
+        target (Optional[str]):
+            The name of the target variable column in the dataset.
+            If None, the agent must provide the target variable through the prompt.
     Returns:
         str: A markdown-formatted string containing the classification report, confusion matrix,
     if data is None:
         data = tool.agent.dataset
+    if target is None or target not in data.columns:
+        return f"Error: Target column not specified or `{target}` not found in the dataset."
     # Handle categorical target
     if data[target].dtype == 'object' or data[target].dtype.name == 'category':