Spaces:

mgbam
/

DataBiz

Sleeping

App Files Files Community

mgbam commited on Jan 28

Commit

a9bdee1

verified ·

1 Parent(s): 1ae6347

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -108

app.py CHANGED Viewed

@@ -26,32 +26,26 @@ import uuid  # For generating unique report IDs
 # ------------------------------
 class GroqLLM:
     """Enhanced LLM interface with support for generating natural language summaries."""
     def __init__(self, model_name: str = "llama-3.1-8B-Instant"):
         """
         Initialize the GroqLLM with a specified model.
-        Parameters
-        ----------
-        model_name : str
-            The name of the language model to use.
         """
         self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
         self.model_name = model_name
     def __call__(self, prompt: Union[str, dict, List[Dict]]) -> str:
         """
         Make the class callable as required by smolagents.
-        Parameters
-        ----------
-        prompt : Union[str, dict, List[Dict]]
-            The input prompt for the language model.
-        Returns
-        -------
-        str
-            The generated response from the language model.
         """
         try:
             # Handle different prompt formats
@@ -84,32 +78,26 @@ class GroqLLM:
 # ------------------------------
 class DataAnalysisAgent(CodeAgent):
     """Extended CodeAgent with dataset awareness and predictive analytics capabilities."""
     def __init__(self, dataset: pd.DataFrame, *args, **kwargs):
         """
         Initialize the DataAnalysisAgent with the provided dataset.
-        Parameters
-        ----------
-        dataset : pd.DataFrame
-            The dataset to analyze.
-        *args : tuple
-            Variable length argument list.
-        **kwargs : dict
-            Arbitrary keyword arguments.
         """
         super().__init__(*args, **kwargs)
         self._dataset = dataset
         self.models = {}  # To store trained models
     @property
     def dataset(self) -> pd.DataFrame:
         """Access the stored dataset.
-        Returns
-        -------
-        pd.DataFrame
-            The dataset stored in the agent.
         """
         return self._dataset
@@ -117,15 +105,11 @@ class DataAnalysisAgent(CodeAgent):
         """
         Override the run method to include dataset context and support predictive tasks.
-        Parameters
-        ----------
-        prompt : str
-            The task prompt for analysis.
-        Returns
-        -------
-        str
-            The result of the analysis.
         """
         dataset_info = f"""
         Dataset Shape: {self.dataset.shape}
@@ -156,18 +140,15 @@ def analyze_basic_stats(data: Optional[pd.DataFrame] = None) -> str:
     columns in the provided DataFrame. It also generates a bar chart visualizing
     the mean, median, and standard deviation for each numerical feature.
-    Parameters
-    ----------
-    data : Optional[pd.DataFrame], optional
-        A pandas DataFrame containing the dataset to analyze.
-        If None, the agent's stored dataset will be used.
-        The DataFrame should contain at least one numerical column
-        for meaningful analysis.
-    Returns
-    -------
-    str
-        A markdown-formatted string containing the statistics and the generated plot.
     """
     if data is None:
         data = tool.agent.dataset
@@ -213,18 +194,15 @@ def generate_correlation_matrix(data: Optional[pd.DataFrame] = None) -> str:
     all numerical columns in the dataset. Users can hover over cells to see correlation values
     and interact with the plot (zoom, pan).
-    Parameters
-    ----------
-    data : Optional[pd.DataFrame], optional
-        A pandas DataFrame containing the dataset to analyze.
-        If None, the agent's stored dataset will be used.
-        The DataFrame should contain at least two numerical columns
-        for correlation analysis.
-    Returns
-    -------
-    str
-        An HTML string representing the interactive correlation matrix plot.
     """
     if data is None:
         data = tool.agent.dataset
@@ -254,18 +232,15 @@ def analyze_categorical_columns(data: Optional[pd.DataFrame] = None) -> str:
     and missing value counts. It also generates bar charts for the top 5 categories in each
     categorical feature.
-    Parameters
-    ----------
-    data : Optional[pd.DataFrame], optional
-        A pandas DataFrame containing the dataset to analyze.
-        If None, the agent's stored dataset will be used.
-        The DataFrame should contain at least one categorical column
-        for meaningful analysis.
-    Returns
-    -------
-    str
-        A markdown-formatted string containing analysis results and embedded plots.
     """
     if data is None:
         data = tool.agent.dataset
@@ -313,18 +288,15 @@ def suggest_features(data: Optional[pd.DataFrame] = None) -> str:
     This function analyzes the dataset's structure and statistical properties to
     recommend possible feature engineering steps that could improve model performance.
-    Parameters
-    ----------
-    data : Optional[pd.DataFrame], optional
-        A pandas DataFrame containing the dataset to analyze.
-        If None, the agent's stored dataset will be used.
-        The DataFrame can contain both numerical and categorical columns.
-    Returns
-    -------
-    str
-        A string containing suggestions for feature engineering based on
-        the characteristics of the input data.
     """
     if data is None:
         data = tool.agent.dataset
@@ -365,21 +337,18 @@ def predictive_analysis(data: Optional[pd.DataFrame] = None, target: Optional[st
     This function builds a classification model using Random Forest, evaluates its performance,
     and provides detailed metrics and visualizations such as the confusion matrix and ROC curve.
-    Parameters
-    ----------
-    data : Optional[pd.DataFrame], optional
-        A pandas DataFrame containing the dataset to analyze.
-        If None, the agent's stored dataset will be used.
-        The DataFrame should contain the target variable for prediction.
-    target : Optional[str], optional
-        The name of the target variable column in the dataset.
-        If None, the agent must provide the target variable through the prompt.
-    Returns
-    -------
-    str
-        A markdown-formatted string containing the classification report, confusion matrix,
-        ROC curve, AUC score, and a unique Model ID.
     """
     if data is None:
         data = tool.agent.dataset
@@ -481,16 +450,12 @@ def export_report(content: str, filename: str):
     This function converts markdown content into a PDF file using pdfkit and provides
     a download button for users to obtain the report.
-    Parameters
-    ----------
-    content : str
-        The markdown content to be included in the PDF report.
-    filename : str
-        The desired name for the exported PDF file.
-    Returns
-    -------
-    None
     """
     # Save content to a temporary HTML file
     with tempfile.NamedTemporaryFile(delete=False, suffix='.html') as tmp_file:

 # ------------------------------
 class GroqLLM:
     """Enhanced LLM interface with support for generating natural language summaries."""
     def __init__(self, model_name: str = "llama-3.1-8B-Instant"):
         """
         Initialize the GroqLLM with a specified model.
+        Args:
+            model_name (str): The name of the language model to use.
         """
         self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
         self.model_name = model_name
     def __call__(self, prompt: Union[str, dict, List[Dict]]) -> str:
         """
         Make the class callable as required by smolagents.
+        Args:
+            prompt (Union[str, dict, List[Dict]]): The input prompt for the language model.
+        Returns:
+            str: The generated response from the language model.
         """
         try:
             # Handle different prompt formats
 # ------------------------------
 class DataAnalysisAgent(CodeAgent):
     """Extended CodeAgent with dataset awareness and predictive analytics capabilities."""
     def __init__(self, dataset: pd.DataFrame, *args, **kwargs):
         """
         Initialize the DataAnalysisAgent with the provided dataset.
+        Args:
+            dataset (pd.DataFrame): The dataset to analyze.
+            *args: Variable length argument list.
+            **kwargs: Arbitrary keyword arguments.
         """
         super().__init__(*args, **kwargs)
         self._dataset = dataset
         self.models = {}  # To store trained models
     @property
     def dataset(self) -> pd.DataFrame:
         """Access the stored dataset.
+        Returns:
+            pd.DataFrame: The dataset stored in the agent.
         """
         return self._dataset
         """
         Override the run method to include dataset context and support predictive tasks.
+        Args:
+            prompt (str): The task prompt for analysis.
+        Returns:
+            str: The result of the analysis.
         """
         dataset_info = f"""
         Dataset Shape: {self.dataset.shape}
     columns in the provided DataFrame. It also generates a bar chart visualizing
     the mean, median, and standard deviation for each numerical feature.
+    Args:
+        data (Optional[pd.DataFrame], optional):
+            A pandas DataFrame containing the dataset to analyze.
+            If None, the agent's stored dataset will be used.
+            The DataFrame should contain at least one numerical column
+            for meaningful analysis.
+    Returns:
+        str: A markdown-formatted string containing the statistics and the generated plot.
     """
     if data is None:
         data = tool.agent.dataset
     all numerical columns in the dataset. Users can hover over cells to see correlation values
     and interact with the plot (zoom, pan).
+    Args:
+        data (Optional[pd.DataFrame], optional):
+            A pandas DataFrame containing the dataset to analyze.
+            If None, the agent's stored dataset will be used.
+            The DataFrame should contain at least two numerical columns
+            for correlation analysis.
+    Returns:
+        str: An HTML string representing the interactive correlation matrix plot.
     """
     if data is None:
         data = tool.agent.dataset
     and missing value counts. It also generates bar charts for the top 5 categories in each
     categorical feature.
+    Args:
+        data (Optional[pd.DataFrame], optional):
+            A pandas DataFrame containing the dataset to analyze.
+            If None, the agent's stored dataset will be used.
+            The DataFrame should contain at least one categorical column
+            for meaningful analysis.
+    Returns:
+        str: A markdown-formatted string containing analysis results and embedded plots.
     """
     if data is None:
         data = tool.agent.dataset
     This function analyzes the dataset's structure and statistical properties to
     recommend possible feature engineering steps that could improve model performance.
+    Args:
+        data (Optional[pd.DataFrame], optional):
+            A pandas DataFrame containing the dataset to analyze.
+            If None, the agent's stored dataset will be used.
+            The DataFrame can contain both numerical and categorical columns.
+    Returns:
+        str: A string containing suggestions for feature engineering based on
+             the characteristics of the input data.
     """
     if data is None:
         data = tool.agent.dataset
     This function builds a classification model using Random Forest, evaluates its performance,
     and provides detailed metrics and visualizations such as the confusion matrix and ROC curve.
+    Args:
+        data (Optional[pd.DataFrame], optional):
+            A pandas DataFrame containing the dataset to analyze.
+            If None, the agent's stored dataset will be used.
+            The DataFrame should contain the target variable for prediction.
+        target (Optional[str], optional):
+            The name of the target variable column in the dataset.
+            If None, the agent must provide the target variable through the prompt.
+    Returns:
+        str: A markdown-formatted string containing the classification report, confusion matrix,
+             ROC curve, AUC score, and a unique Model ID.
     """
     if data is None:
         data = tool.agent.dataset
     This function converts markdown content into a PDF file using pdfkit and provides
     a download button for users to obtain the report.
+    Args:
+        content (str): The markdown content to be included in the PDF report.
+        filename (str): The desired name for the exported PDF file.
+    Returns:
+        None
     """
     # Save content to a temporary HTML file
     with tempfile.NamedTemporaryFile(delete=False, suffix='.html') as tmp_file: