mgbam commited on
Commit
a9bdee1
·
verified ·
1 Parent(s): 1ae6347

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -108
app.py CHANGED
@@ -26,32 +26,26 @@ import uuid # For generating unique report IDs
26
  # ------------------------------
27
  class GroqLLM:
28
  """Enhanced LLM interface with support for generating natural language summaries."""
29
-
30
  def __init__(self, model_name: str = "llama-3.1-8B-Instant"):
31
  """
32
  Initialize the GroqLLM with a specified model.
33
 
34
- Parameters
35
- ----------
36
- model_name : str
37
- The name of the language model to use.
38
  """
39
  self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
40
  self.model_name = model_name
41
-
42
  def __call__(self, prompt: Union[str, dict, List[Dict]]) -> str:
43
  """
44
  Make the class callable as required by smolagents.
45
 
46
- Parameters
47
- ----------
48
- prompt : Union[str, dict, List[Dict]]
49
- The input prompt for the language model.
50
 
51
- Returns
52
- -------
53
- str
54
- The generated response from the language model.
55
  """
56
  try:
57
  # Handle different prompt formats
@@ -84,32 +78,26 @@ class GroqLLM:
84
  # ------------------------------
85
  class DataAnalysisAgent(CodeAgent):
86
  """Extended CodeAgent with dataset awareness and predictive analytics capabilities."""
87
-
88
  def __init__(self, dataset: pd.DataFrame, *args, **kwargs):
89
  """
90
  Initialize the DataAnalysisAgent with the provided dataset.
91
 
92
- Parameters
93
- ----------
94
- dataset : pd.DataFrame
95
- The dataset to analyze.
96
- *args : tuple
97
- Variable length argument list.
98
- **kwargs : dict
99
- Arbitrary keyword arguments.
100
  """
101
  super().__init__(*args, **kwargs)
102
  self._dataset = dataset
103
  self.models = {} # To store trained models
104
-
105
  @property
106
  def dataset(self) -> pd.DataFrame:
107
  """Access the stored dataset.
108
 
109
- Returns
110
- -------
111
- pd.DataFrame
112
- The dataset stored in the agent.
113
  """
114
  return self._dataset
115
 
@@ -117,15 +105,11 @@ class DataAnalysisAgent(CodeAgent):
117
  """
118
  Override the run method to include dataset context and support predictive tasks.
119
 
120
- Parameters
121
- ----------
122
- prompt : str
123
- The task prompt for analysis.
124
 
125
- Returns
126
- -------
127
- str
128
- The result of the analysis.
129
  """
130
  dataset_info = f"""
131
  Dataset Shape: {self.dataset.shape}
@@ -156,18 +140,15 @@ def analyze_basic_stats(data: Optional[pd.DataFrame] = None) -> str:
156
  columns in the provided DataFrame. It also generates a bar chart visualizing
157
  the mean, median, and standard deviation for each numerical feature.
158
 
159
- Parameters
160
- ----------
161
- data : Optional[pd.DataFrame], optional
162
- A pandas DataFrame containing the dataset to analyze.
163
- If None, the agent's stored dataset will be used.
164
- The DataFrame should contain at least one numerical column
165
- for meaningful analysis.
166
-
167
- Returns
168
- -------
169
- str
170
- A markdown-formatted string containing the statistics and the generated plot.
171
  """
172
  if data is None:
173
  data = tool.agent.dataset
@@ -213,18 +194,15 @@ def generate_correlation_matrix(data: Optional[pd.DataFrame] = None) -> str:
213
  all numerical columns in the dataset. Users can hover over cells to see correlation values
214
  and interact with the plot (zoom, pan).
215
 
216
- Parameters
217
- ----------
218
- data : Optional[pd.DataFrame], optional
219
- A pandas DataFrame containing the dataset to analyze.
220
- If None, the agent's stored dataset will be used.
221
- The DataFrame should contain at least two numerical columns
222
- for correlation analysis.
223
-
224
- Returns
225
- -------
226
- str
227
- An HTML string representing the interactive correlation matrix plot.
228
  """
229
  if data is None:
230
  data = tool.agent.dataset
@@ -254,18 +232,15 @@ def analyze_categorical_columns(data: Optional[pd.DataFrame] = None) -> str:
254
  and missing value counts. It also generates bar charts for the top 5 categories in each
255
  categorical feature.
256
 
257
- Parameters
258
- ----------
259
- data : Optional[pd.DataFrame], optional
260
- A pandas DataFrame containing the dataset to analyze.
261
- If None, the agent's stored dataset will be used.
262
- The DataFrame should contain at least one categorical column
263
- for meaningful analysis.
264
-
265
- Returns
266
- -------
267
- str
268
- A markdown-formatted string containing analysis results and embedded plots.
269
  """
270
  if data is None:
271
  data = tool.agent.dataset
@@ -313,18 +288,15 @@ def suggest_features(data: Optional[pd.DataFrame] = None) -> str:
313
  This function analyzes the dataset's structure and statistical properties to
314
  recommend possible feature engineering steps that could improve model performance.
315
 
316
- Parameters
317
- ----------
318
- data : Optional[pd.DataFrame], optional
319
- A pandas DataFrame containing the dataset to analyze.
320
- If None, the agent's stored dataset will be used.
321
- The DataFrame can contain both numerical and categorical columns.
322
-
323
- Returns
324
- -------
325
- str
326
- A string containing suggestions for feature engineering based on
327
- the characteristics of the input data.
328
  """
329
  if data is None:
330
  data = tool.agent.dataset
@@ -365,21 +337,18 @@ def predictive_analysis(data: Optional[pd.DataFrame] = None, target: Optional[st
365
  This function builds a classification model using Random Forest, evaluates its performance,
366
  and provides detailed metrics and visualizations such as the confusion matrix and ROC curve.
367
 
368
- Parameters
369
- ----------
370
- data : Optional[pd.DataFrame], optional
371
- A pandas DataFrame containing the dataset to analyze.
372
- If None, the agent's stored dataset will be used.
373
- The DataFrame should contain the target variable for prediction.
374
- target : Optional[str], optional
375
- The name of the target variable column in the dataset.
376
- If None, the agent must provide the target variable through the prompt.
377
-
378
- Returns
379
- -------
380
- str
381
- A markdown-formatted string containing the classification report, confusion matrix,
382
- ROC curve, AUC score, and a unique Model ID.
383
  """
384
  if data is None:
385
  data = tool.agent.dataset
@@ -481,16 +450,12 @@ def export_report(content: str, filename: str):
481
  This function converts markdown content into a PDF file using pdfkit and provides
482
  a download button for users to obtain the report.
483
 
484
- Parameters
485
- ----------
486
- content : str
487
- The markdown content to be included in the PDF report.
488
- filename : str
489
- The desired name for the exported PDF file.
490
 
491
- Returns
492
- -------
493
- None
494
  """
495
  # Save content to a temporary HTML file
496
  with tempfile.NamedTemporaryFile(delete=False, suffix='.html') as tmp_file:
 
26
  # ------------------------------
27
  class GroqLLM:
28
  """Enhanced LLM interface with support for generating natural language summaries."""
29
+
30
  def __init__(self, model_name: str = "llama-3.1-8B-Instant"):
31
  """
32
  Initialize the GroqLLM with a specified model.
33
 
34
+ Args:
35
+ model_name (str): The name of the language model to use.
 
 
36
  """
37
  self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
38
  self.model_name = model_name
39
+
40
  def __call__(self, prompt: Union[str, dict, List[Dict]]) -> str:
41
  """
42
  Make the class callable as required by smolagents.
43
 
44
+ Args:
45
+ prompt (Union[str, dict, List[Dict]]): The input prompt for the language model.
 
 
46
 
47
+ Returns:
48
+ str: The generated response from the language model.
 
 
49
  """
50
  try:
51
  # Handle different prompt formats
 
78
  # ------------------------------
79
  class DataAnalysisAgent(CodeAgent):
80
  """Extended CodeAgent with dataset awareness and predictive analytics capabilities."""
81
+
82
  def __init__(self, dataset: pd.DataFrame, *args, **kwargs):
83
  """
84
  Initialize the DataAnalysisAgent with the provided dataset.
85
 
86
+ Args:
87
+ dataset (pd.DataFrame): The dataset to analyze.
88
+ *args: Variable length argument list.
89
+ **kwargs: Arbitrary keyword arguments.
 
 
 
 
90
  """
91
  super().__init__(*args, **kwargs)
92
  self._dataset = dataset
93
  self.models = {} # To store trained models
94
+
95
  @property
96
  def dataset(self) -> pd.DataFrame:
97
  """Access the stored dataset.
98
 
99
+ Returns:
100
+ pd.DataFrame: The dataset stored in the agent.
 
 
101
  """
102
  return self._dataset
103
 
 
105
  """
106
  Override the run method to include dataset context and support predictive tasks.
107
 
108
+ Args:
109
+ prompt (str): The task prompt for analysis.
 
 
110
 
111
+ Returns:
112
+ str: The result of the analysis.
 
 
113
  """
114
  dataset_info = f"""
115
  Dataset Shape: {self.dataset.shape}
 
140
  columns in the provided DataFrame. It also generates a bar chart visualizing
141
  the mean, median, and standard deviation for each numerical feature.
142
 
143
+ Args:
144
+ data (Optional[pd.DataFrame], optional):
145
+ A pandas DataFrame containing the dataset to analyze.
146
+ If None, the agent's stored dataset will be used.
147
+ The DataFrame should contain at least one numerical column
148
+ for meaningful analysis.
149
+
150
+ Returns:
151
+ str: A markdown-formatted string containing the statistics and the generated plot.
 
 
 
152
  """
153
  if data is None:
154
  data = tool.agent.dataset
 
194
  all numerical columns in the dataset. Users can hover over cells to see correlation values
195
  and interact with the plot (zoom, pan).
196
 
197
+ Args:
198
+ data (Optional[pd.DataFrame], optional):
199
+ A pandas DataFrame containing the dataset to analyze.
200
+ If None, the agent's stored dataset will be used.
201
+ The DataFrame should contain at least two numerical columns
202
+ for correlation analysis.
203
+
204
+ Returns:
205
+ str: An HTML string representing the interactive correlation matrix plot.
 
 
 
206
  """
207
  if data is None:
208
  data = tool.agent.dataset
 
232
  and missing value counts. It also generates bar charts for the top 5 categories in each
233
  categorical feature.
234
 
235
+ Args:
236
+ data (Optional[pd.DataFrame], optional):
237
+ A pandas DataFrame containing the dataset to analyze.
238
+ If None, the agent's stored dataset will be used.
239
+ The DataFrame should contain at least one categorical column
240
+ for meaningful analysis.
241
+
242
+ Returns:
243
+ str: A markdown-formatted string containing analysis results and embedded plots.
 
 
 
244
  """
245
  if data is None:
246
  data = tool.agent.dataset
 
288
  This function analyzes the dataset's structure and statistical properties to
289
  recommend possible feature engineering steps that could improve model performance.
290
 
291
+ Args:
292
+ data (Optional[pd.DataFrame], optional):
293
+ A pandas DataFrame containing the dataset to analyze.
294
+ If None, the agent's stored dataset will be used.
295
+ The DataFrame can contain both numerical and categorical columns.
296
+
297
+ Returns:
298
+ str: A string containing suggestions for feature engineering based on
299
+ the characteristics of the input data.
 
 
 
300
  """
301
  if data is None:
302
  data = tool.agent.dataset
 
337
  This function builds a classification model using Random Forest, evaluates its performance,
338
  and provides detailed metrics and visualizations such as the confusion matrix and ROC curve.
339
 
340
+ Args:
341
+ data (Optional[pd.DataFrame], optional):
342
+ A pandas DataFrame containing the dataset to analyze.
343
+ If None, the agent's stored dataset will be used.
344
+ The DataFrame should contain the target variable for prediction.
345
+ target (Optional[str], optional):
346
+ The name of the target variable column in the dataset.
347
+ If None, the agent must provide the target variable through the prompt.
348
+
349
+ Returns:
350
+ str: A markdown-formatted string containing the classification report, confusion matrix,
351
+ ROC curve, AUC score, and a unique Model ID.
 
 
 
352
  """
353
  if data is None:
354
  data = tool.agent.dataset
 
450
  This function converts markdown content into a PDF file using pdfkit and provides
451
  a download button for users to obtain the report.
452
 
453
+ Args:
454
+ content (str): The markdown content to be included in the PDF report.
455
+ filename (str): The desired name for the exported PDF file.
 
 
 
456
 
457
+ Returns:
458
+ None
 
459
  """
460
  # Save content to a temporary HTML file
461
  with tempfile.NamedTemporaryFile(delete=False, suffix='.html') as tmp_file: