Spaces:

mgbam
/

DataBiz

Sleeping

App Files Files Community

mgbam commited on Jan 28

Commit

659fba8

verified ·

1 Parent(s): 16f65e2

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -9

app.py CHANGED Viewed

@@ -11,13 +11,13 @@ import base64
 import io
 class GroqLLM:
-    """Compatible LLM interface for smolagents CodeAgent"""
     def __init__(self, model_name="llama-3.1-8B-Instant"):
         self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
         self.model_name = model_name
     def __call__(self, prompt: Union[str, dict, List[Dict]]) -> str:
-        """Make the class callable as required by smolagents"""
         try:
             # Handle different prompt formats
             if isinstance(prompt, (dict, list)):
@@ -49,18 +49,18 @@ class GroqLLM:
             return error_msg
 class DataAnalysisAgent(CodeAgent):
-    """Extended CodeAgent with dataset awareness"""
     def __init__(self, dataset: pd.DataFrame, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self._dataset = dataset
     @property
     def dataset(self) -> pd.DataFrame:
-        """Access the stored dataset"""
         return self._dataset
     def run(self, prompt: str) -> str:
-        """Override run method to include dataset context"""
         dataset_info = f"""
         Dataset Shape: {self.dataset.shape}
         Columns: {', '.join(self.dataset.columns)}
@@ -78,7 +78,15 @@ class DataAnalysisAgent(CodeAgent):
 @tool
 def analyze_basic_stats(data: pd.DataFrame) -> str:
-    """Calculate basic statistical measures for numerical columns in the dataset."""
     if data is None:
         data = tool.agent.dataset
@@ -98,7 +106,14 @@ def analyze_basic_stats(data: pd.DataFrame) -> str:
 @tool
 def generate_correlation_matrix(data: pd.DataFrame) -> str:
-    """Generate a visual correlation matrix for numerical columns in the dataset."""
     if data is None:
         data = tool.agent.dataset
@@ -115,7 +130,15 @@ def generate_correlation_matrix(data: pd.DataFrame) -> str:
 @tool
 def analyze_categorical_columns(data: pd.DataFrame) -> str:
-    """Analyze categorical columns in the dataset for distribution and frequencies."""
     if data is None:
         data = tool.agent.dataset
@@ -133,7 +156,15 @@ def analyze_categorical_columns(data: pd.DataFrame) -> str:
 @tool
 def suggest_features(data: pd.DataFrame) -> str:
-    """Suggest potential feature engineering steps based on data characteristics."""
     if data is None:
         data = tool.agent.dataset

 import io
 class GroqLLM:
+    """Compatible LLM interface for smolagents CodeAgent."""
     def __init__(self, model_name="llama-3.1-8B-Instant"):
         self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
         self.model_name = model_name
     def __call__(self, prompt: Union[str, dict, List[Dict]]) -> str:
+        """Make the class callable as required by smolagents."""
         try:
             # Handle different prompt formats
             if isinstance(prompt, (dict, list)):
             return error_msg
 class DataAnalysisAgent(CodeAgent):
+    """Extended CodeAgent with dataset awareness."""
     def __init__(self, dataset: pd.DataFrame, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self._dataset = dataset
     @property
     def dataset(self) -> pd.DataFrame:
+        """Access the stored dataset."""
         return self._dataset
     def run(self, prompt: str) -> str:
+        """Override run method to include dataset context."""
         dataset_info = f"""
         Dataset Shape: {self.dataset.shape}
         Columns: {', '.join(self.dataset.columns)}
 @tool
 def analyze_basic_stats(data: pd.DataFrame) -> str:
+    """Calculate basic statistical measures for numerical columns in the dataset.
+    Args:
+        data (pd.DataFrame): The dataset to analyze. It should contain at least one numerical column.
+    Returns:
+        str: A string containing formatted basic statistics for each numerical column,
+            including mean, median, standard deviation, skewness, and missing value counts.
+    """
     if data is None:
         data = tool.agent.dataset
 @tool
 def generate_correlation_matrix(data: pd.DataFrame) -> str:
+    """Generate a visual correlation matrix for numerical columns in the dataset.
+    Args:
+        data (pd.DataFrame): The dataset to analyze. It should contain at least two numerical columns.
+    Returns:
+        str: A base64 encoded string representing the correlation matrix plot image.
+    """
     if data is None:
         data = tool.agent.dataset
 @tool
 def analyze_categorical_columns(data: pd.DataFrame) -> str:
+    """Analyze categorical columns in the dataset for distribution and frequencies.
+    Args:
+        data (pd.DataFrame): The dataset to analyze. It should contain at least one categorical column.
+    Returns:
+        str: A string containing formatted analysis results for each categorical column,
+            including unique value counts, top categories, and missing value counts.
+    """
     if data is None:
         data = tool.agent.dataset
 @tool
 def suggest_features(data: pd.DataFrame) -> str:
+    """Suggest potential feature engineering steps based on data characteristics.
+    Args:
+        data (pd.DataFrame): The dataset to analyze. It can contain both numerical and categorical columns.
+    Returns:
+        str: A string containing suggestions for feature engineering based on
+            the characteristics of the input data.
+    """
     if data is None:
         data = tool.agent.dataset