Spaces:

mgbam
/

DataBiz

Sleeping

App Files Files Community

mgbam commited on Jan 28

Commit

1ae6347

verified ·

1 Parent(s): f60d18c

Update app.py

Browse files

Files changed (1) hide show

app.py +104 -69

app.py CHANGED Viewed

@@ -31,8 +31,10 @@ class GroqLLM:
         """
         Initialize the GroqLLM with a specified model.
-        Args:
-            model_name (str): The name of the language model to use.
         """
         self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
         self.model_name = model_name
@@ -41,11 +43,15 @@ class GroqLLM:
         """
         Make the class callable as required by smolagents.
-        Args:
-            prompt (Union[str, dict, List[Dict]]): The input prompt for the language model.
-        Returns:
-            str: The generated response from the language model.
         """
         try:
             # Handle different prompt formats
@@ -83,10 +89,14 @@ class DataAnalysisAgent(CodeAgent):
         """
         Initialize the DataAnalysisAgent with the provided dataset.
-        Args:
-            dataset (pd.DataFrame): The dataset to analyze.
-            *args: Variable length argument list.
-            **kwargs: Arbitrary keyword arguments.
         """
         super().__init__(*args, **kwargs)
         self._dataset = dataset
@@ -96,8 +106,10 @@ class DataAnalysisAgent(CodeAgent):
     def dataset(self) -> pd.DataFrame:
         """Access the stored dataset.
-        Returns:
-            pd.DataFrame: The dataset stored in the agent.
         """
         return self._dataset
@@ -105,11 +117,15 @@ class DataAnalysisAgent(CodeAgent):
         """
         Override the run method to include dataset context and support predictive tasks.
-        Args:
-            prompt (str): The task prompt for analysis.
-        Returns:
-            str: The result of the analysis.
         """
         dataset_info = f"""
         Dataset Shape: {self.dataset.shape}
@@ -140,15 +156,18 @@ def analyze_basic_stats(data: Optional[pd.DataFrame] = None) -> str:
     columns in the provided DataFrame. It also generates a bar chart visualizing
     the mean, median, and standard deviation for each numerical feature.
-    Args:
-        data (Optional[pd.DataFrame]):
-            A pandas DataFrame containing the dataset to analyze.
-            If None, the agent's stored dataset will be used.
-            The DataFrame should contain at least one numerical column
-            for meaningful analysis.
-    Returns:
-        str: A markdown-formatted string containing the statistics and the generated plot.
     """
     if data is None:
         data = tool.agent.dataset
@@ -194,15 +213,18 @@ def generate_correlation_matrix(data: Optional[pd.DataFrame] = None) -> str:
     all numerical columns in the dataset. Users can hover over cells to see correlation values
     and interact with the plot (zoom, pan).
-    Args:
-        data (Optional[pd.DataFrame]):
-            A pandas DataFrame containing the dataset to analyze.
-            If None, the agent's stored dataset will be used.
-            The DataFrame should contain at least two numerical columns
-            for correlation analysis.
-    Returns:
-        str: An HTML string representing the interactive correlation matrix plot.
     """
     if data is None:
         data = tool.agent.dataset
@@ -232,15 +254,18 @@ def analyze_categorical_columns(data: Optional[pd.DataFrame] = None) -> str:
     and missing value counts. It also generates bar charts for the top 5 categories in each
     categorical feature.
-    Args:
-        data (Optional[pd.DataFrame]):
-            A pandas DataFrame containing the dataset to analyze.
-            If None, the agent's stored dataset will be used.
-            The DataFrame should contain at least one categorical column
-            for meaningful analysis.
-    Returns:
-        str: A markdown-formatted string containing analysis results and embedded plots.
     """
     if data is None:
         data = tool.agent.dataset
@@ -288,15 +313,18 @@ def suggest_features(data: Optional[pd.DataFrame] = None) -> str:
     This function analyzes the dataset's structure and statistical properties to
     recommend possible feature engineering steps that could improve model performance.
-    Args:
-        data (Optional[pd.DataFrame]):
-            A pandas DataFrame containing the dataset to analyze.
-            If None, the agent's stored dataset will be used.
-            The DataFrame can contain both numerical and categorical columns.
-    Returns:
-        str: A string containing suggestions for feature engineering based on
-             the characteristics of the input data.
     """
     if data is None:
         data = tool.agent.dataset
@@ -337,18 +365,21 @@ def predictive_analysis(data: Optional[pd.DataFrame] = None, target: Optional[st
     This function builds a classification model using Random Forest, evaluates its performance,
     and provides detailed metrics and visualizations such as the confusion matrix and ROC curve.
-    Args:
-        data (Optional[pd.DataFrame]):
-            A pandas DataFrame containing the dataset to analyze.
-            If None, the agent's stored dataset will be used.
-            The DataFrame should contain the target variable for prediction.
-        target (Optional[str]):
-            The name of the target variable column in the dataset.
-            If None, the agent must provide the target variable through the prompt.
-    Returns:
-        str: A markdown-formatted string containing the classification report, confusion matrix,
-             ROC curve, AUC score, and a unique Model ID.
     """
     if data is None:
         data = tool.agent.dataset
@@ -450,12 +481,16 @@ def export_report(content: str, filename: str):
     This function converts markdown content into a PDF file using pdfkit and provides
     a download button for users to obtain the report.
-    Args:
-        content (str): The markdown content to be included in the PDF report.
-        filename (str): The desired name for the exported PDF file.
-    Returns:
-        None
     """
     # Save content to a temporary HTML file
     with tempfile.NamedTemporaryFile(delete=False, suffix='.html') as tmp_file:

         """
         Initialize the GroqLLM with a specified model.
+        Parameters
+        ----------
+        model_name : str
+            The name of the language model to use.
         """
         self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
         self.model_name = model_name
         """
         Make the class callable as required by smolagents.
+        Parameters
+        ----------
+        prompt : Union[str, dict, List[Dict]]
+            The input prompt for the language model.
+        Returns
+        -------
+        str
+            The generated response from the language model.
         """
         try:
             # Handle different prompt formats
         """
         Initialize the DataAnalysisAgent with the provided dataset.
+        Parameters
+        ----------
+        dataset : pd.DataFrame
+            The dataset to analyze.
+        *args : tuple
+            Variable length argument list.
+        **kwargs : dict
+            Arbitrary keyword arguments.
         """
         super().__init__(*args, **kwargs)
         self._dataset = dataset
     def dataset(self) -> pd.DataFrame:
         """Access the stored dataset.
+        Returns
+        -------
+        pd.DataFrame
+            The dataset stored in the agent.
         """
         return self._dataset
         """
         Override the run method to include dataset context and support predictive tasks.
+        Parameters
+        ----------
+        prompt : str
+            The task prompt for analysis.
+        Returns
+        -------
+        str
+            The result of the analysis.
         """
         dataset_info = f"""
         Dataset Shape: {self.dataset.shape}
     columns in the provided DataFrame. It also generates a bar chart visualizing
     the mean, median, and standard deviation for each numerical feature.
+    Parameters
+    ----------
+    data : Optional[pd.DataFrame], optional
+        A pandas DataFrame containing the dataset to analyze.
+        If None, the agent's stored dataset will be used.
+        The DataFrame should contain at least one numerical column
+        for meaningful analysis.
+    Returns
+    -------
+    str
+        A markdown-formatted string containing the statistics and the generated plot.
     """
     if data is None:
         data = tool.agent.dataset
     all numerical columns in the dataset. Users can hover over cells to see correlation values
     and interact with the plot (zoom, pan).
+    Parameters
+    ----------
+    data : Optional[pd.DataFrame], optional
+        A pandas DataFrame containing the dataset to analyze.
+        If None, the agent's stored dataset will be used.
+        The DataFrame should contain at least two numerical columns
+        for correlation analysis.
+    Returns
+    -------
+    str
+        An HTML string representing the interactive correlation matrix plot.
     """
     if data is None:
         data = tool.agent.dataset
     and missing value counts. It also generates bar charts for the top 5 categories in each
     categorical feature.
+    Parameters
+    ----------
+    data : Optional[pd.DataFrame], optional
+        A pandas DataFrame containing the dataset to analyze.
+        If None, the agent's stored dataset will be used.
+        The DataFrame should contain at least one categorical column
+        for meaningful analysis.
+    Returns
+    -------
+    str
+        A markdown-formatted string containing analysis results and embedded plots.
     """
     if data is None:
         data = tool.agent.dataset
     This function analyzes the dataset's structure and statistical properties to
     recommend possible feature engineering steps that could improve model performance.
+    Parameters
+    ----------
+    data : Optional[pd.DataFrame], optional
+        A pandas DataFrame containing the dataset to analyze.
+        If None, the agent's stored dataset will be used.
+        The DataFrame can contain both numerical and categorical columns.
+    Returns
+    -------
+    str
+        A string containing suggestions for feature engineering based on
+        the characteristics of the input data.
     """
     if data is None:
         data = tool.agent.dataset
     This function builds a classification model using Random Forest, evaluates its performance,
     and provides detailed metrics and visualizations such as the confusion matrix and ROC curve.
+    Parameters
+    ----------
+    data : Optional[pd.DataFrame], optional
+        A pandas DataFrame containing the dataset to analyze.
+        If None, the agent's stored dataset will be used.
+        The DataFrame should contain the target variable for prediction.
+    target : Optional[str], optional
+        The name of the target variable column in the dataset.
+        If None, the agent must provide the target variable through the prompt.
+    Returns
+    -------
+    str
+        A markdown-formatted string containing the classification report, confusion matrix,
+        ROC curve, AUC score, and a unique Model ID.
     """
     if data is None:
         data = tool.agent.dataset
     This function converts markdown content into a PDF file using pdfkit and provides
     a download button for users to obtain the report.
+    Parameters
+    ----------
+    content : str
+        The markdown content to be included in the PDF report.
+    filename : str
+        The desired name for the exported PDF file.
+    Returns
+    -------
+    None
     """
     # Save content to a temporary HTML file
     with tempfile.NamedTemporaryFile(delete=False, suffix='.html') as tmp_file: