mgbam commited on
Commit
1ae6347
·
verified ·
1 Parent(s): f60d18c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -69
app.py CHANGED
@@ -31,8 +31,10 @@ class GroqLLM:
31
  """
32
  Initialize the GroqLLM with a specified model.
33
 
34
- Args:
35
- model_name (str): The name of the language model to use.
 
 
36
  """
37
  self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
38
  self.model_name = model_name
@@ -41,11 +43,15 @@ class GroqLLM:
41
  """
42
  Make the class callable as required by smolagents.
43
 
44
- Args:
45
- prompt (Union[str, dict, List[Dict]]): The input prompt for the language model.
 
 
46
 
47
- Returns:
48
- str: The generated response from the language model.
 
 
49
  """
50
  try:
51
  # Handle different prompt formats
@@ -83,10 +89,14 @@ class DataAnalysisAgent(CodeAgent):
83
  """
84
  Initialize the DataAnalysisAgent with the provided dataset.
85
 
86
- Args:
87
- dataset (pd.DataFrame): The dataset to analyze.
88
- *args: Variable length argument list.
89
- **kwargs: Arbitrary keyword arguments.
 
 
 
 
90
  """
91
  super().__init__(*args, **kwargs)
92
  self._dataset = dataset
@@ -96,8 +106,10 @@ class DataAnalysisAgent(CodeAgent):
96
  def dataset(self) -> pd.DataFrame:
97
  """Access the stored dataset.
98
 
99
- Returns:
100
- pd.DataFrame: The dataset stored in the agent.
 
 
101
  """
102
  return self._dataset
103
 
@@ -105,11 +117,15 @@ class DataAnalysisAgent(CodeAgent):
105
  """
106
  Override the run method to include dataset context and support predictive tasks.
107
 
108
- Args:
109
- prompt (str): The task prompt for analysis.
 
 
110
 
111
- Returns:
112
- str: The result of the analysis.
 
 
113
  """
114
  dataset_info = f"""
115
  Dataset Shape: {self.dataset.shape}
@@ -140,15 +156,18 @@ def analyze_basic_stats(data: Optional[pd.DataFrame] = None) -> str:
140
  columns in the provided DataFrame. It also generates a bar chart visualizing
141
  the mean, median, and standard deviation for each numerical feature.
142
 
143
- Args:
144
- data (Optional[pd.DataFrame]):
145
- A pandas DataFrame containing the dataset to analyze.
146
- If None, the agent's stored dataset will be used.
147
- The DataFrame should contain at least one numerical column
148
- for meaningful analysis.
149
-
150
- Returns:
151
- str: A markdown-formatted string containing the statistics and the generated plot.
 
 
 
152
  """
153
  if data is None:
154
  data = tool.agent.dataset
@@ -194,15 +213,18 @@ def generate_correlation_matrix(data: Optional[pd.DataFrame] = None) -> str:
194
  all numerical columns in the dataset. Users can hover over cells to see correlation values
195
  and interact with the plot (zoom, pan).
196
 
197
- Args:
198
- data (Optional[pd.DataFrame]):
199
- A pandas DataFrame containing the dataset to analyze.
200
- If None, the agent's stored dataset will be used.
201
- The DataFrame should contain at least two numerical columns
202
- for correlation analysis.
203
-
204
- Returns:
205
- str: An HTML string representing the interactive correlation matrix plot.
 
 
 
206
  """
207
  if data is None:
208
  data = tool.agent.dataset
@@ -232,15 +254,18 @@ def analyze_categorical_columns(data: Optional[pd.DataFrame] = None) -> str:
232
  and missing value counts. It also generates bar charts for the top 5 categories in each
233
  categorical feature.
234
 
235
- Args:
236
- data (Optional[pd.DataFrame]):
237
- A pandas DataFrame containing the dataset to analyze.
238
- If None, the agent's stored dataset will be used.
239
- The DataFrame should contain at least one categorical column
240
- for meaningful analysis.
241
-
242
- Returns:
243
- str: A markdown-formatted string containing analysis results and embedded plots.
 
 
 
244
  """
245
  if data is None:
246
  data = tool.agent.dataset
@@ -288,15 +313,18 @@ def suggest_features(data: Optional[pd.DataFrame] = None) -> str:
288
  This function analyzes the dataset's structure and statistical properties to
289
  recommend possible feature engineering steps that could improve model performance.
290
 
291
- Args:
292
- data (Optional[pd.DataFrame]):
293
- A pandas DataFrame containing the dataset to analyze.
294
- If None, the agent's stored dataset will be used.
295
- The DataFrame can contain both numerical and categorical columns.
296
-
297
- Returns:
298
- str: A string containing suggestions for feature engineering based on
299
- the characteristics of the input data.
 
 
 
300
  """
301
  if data is None:
302
  data = tool.agent.dataset
@@ -337,18 +365,21 @@ def predictive_analysis(data: Optional[pd.DataFrame] = None, target: Optional[st
337
  This function builds a classification model using Random Forest, evaluates its performance,
338
  and provides detailed metrics and visualizations such as the confusion matrix and ROC curve.
339
 
340
- Args:
341
- data (Optional[pd.DataFrame]):
342
- A pandas DataFrame containing the dataset to analyze.
343
- If None, the agent's stored dataset will be used.
344
- The DataFrame should contain the target variable for prediction.
345
- target (Optional[str]):
346
- The name of the target variable column in the dataset.
347
- If None, the agent must provide the target variable through the prompt.
348
-
349
- Returns:
350
- str: A markdown-formatted string containing the classification report, confusion matrix,
351
- ROC curve, AUC score, and a unique Model ID.
 
 
 
352
  """
353
  if data is None:
354
  data = tool.agent.dataset
@@ -450,12 +481,16 @@ def export_report(content: str, filename: str):
450
  This function converts markdown content into a PDF file using pdfkit and provides
451
  a download button for users to obtain the report.
452
 
453
- Args:
454
- content (str): The markdown content to be included in the PDF report.
455
- filename (str): The desired name for the exported PDF file.
 
 
 
456
 
457
- Returns:
458
- None
 
459
  """
460
  # Save content to a temporary HTML file
461
  with tempfile.NamedTemporaryFile(delete=False, suffix='.html') as tmp_file:
 
31
  """
32
  Initialize the GroqLLM with a specified model.
33
 
34
+ Parameters
35
+ ----------
36
+ model_name : str
37
+ The name of the language model to use.
38
  """
39
  self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
40
  self.model_name = model_name
 
43
  """
44
  Make the class callable as required by smolagents.
45
 
46
+ Parameters
47
+ ----------
48
+ prompt : Union[str, dict, List[Dict]]
49
+ The input prompt for the language model.
50
 
51
+ Returns
52
+ -------
53
+ str
54
+ The generated response from the language model.
55
  """
56
  try:
57
  # Handle different prompt formats
 
89
  """
90
  Initialize the DataAnalysisAgent with the provided dataset.
91
 
92
+ Parameters
93
+ ----------
94
+ dataset : pd.DataFrame
95
+ The dataset to analyze.
96
+ *args : tuple
97
+ Variable length argument list.
98
+ **kwargs : dict
99
+ Arbitrary keyword arguments.
100
  """
101
  super().__init__(*args, **kwargs)
102
  self._dataset = dataset
 
106
  def dataset(self) -> pd.DataFrame:
107
  """Access the stored dataset.
108
 
109
+ Returns
110
+ -------
111
+ pd.DataFrame
112
+ The dataset stored in the agent.
113
  """
114
  return self._dataset
115
 
 
117
  """
118
  Override the run method to include dataset context and support predictive tasks.
119
 
120
+ Parameters
121
+ ----------
122
+ prompt : str
123
+ The task prompt for analysis.
124
 
125
+ Returns
126
+ -------
127
+ str
128
+ The result of the analysis.
129
  """
130
  dataset_info = f"""
131
  Dataset Shape: {self.dataset.shape}
 
156
  columns in the provided DataFrame. It also generates a bar chart visualizing
157
  the mean, median, and standard deviation for each numerical feature.
158
 
159
+ Parameters
160
+ ----------
161
+ data : Optional[pd.DataFrame], optional
162
+ A pandas DataFrame containing the dataset to analyze.
163
+ If None, the agent's stored dataset will be used.
164
+ The DataFrame should contain at least one numerical column
165
+ for meaningful analysis.
166
+
167
+ Returns
168
+ -------
169
+ str
170
+ A markdown-formatted string containing the statistics and the generated plot.
171
  """
172
  if data is None:
173
  data = tool.agent.dataset
 
213
  all numerical columns in the dataset. Users can hover over cells to see correlation values
214
  and interact with the plot (zoom, pan).
215
 
216
+ Parameters
217
+ ----------
218
+ data : Optional[pd.DataFrame], optional
219
+ A pandas DataFrame containing the dataset to analyze.
220
+ If None, the agent's stored dataset will be used.
221
+ The DataFrame should contain at least two numerical columns
222
+ for correlation analysis.
223
+
224
+ Returns
225
+ -------
226
+ str
227
+ An HTML string representing the interactive correlation matrix plot.
228
  """
229
  if data is None:
230
  data = tool.agent.dataset
 
254
  and missing value counts. It also generates bar charts for the top 5 categories in each
255
  categorical feature.
256
 
257
+ Parameters
258
+ ----------
259
+ data : Optional[pd.DataFrame], optional
260
+ A pandas DataFrame containing the dataset to analyze.
261
+ If None, the agent's stored dataset will be used.
262
+ The DataFrame should contain at least one categorical column
263
+ for meaningful analysis.
264
+
265
+ Returns
266
+ -------
267
+ str
268
+ A markdown-formatted string containing analysis results and embedded plots.
269
  """
270
  if data is None:
271
  data = tool.agent.dataset
 
313
  This function analyzes the dataset's structure and statistical properties to
314
  recommend possible feature engineering steps that could improve model performance.
315
 
316
+ Parameters
317
+ ----------
318
+ data : Optional[pd.DataFrame], optional
319
+ A pandas DataFrame containing the dataset to analyze.
320
+ If None, the agent's stored dataset will be used.
321
+ The DataFrame can contain both numerical and categorical columns.
322
+
323
+ Returns
324
+ -------
325
+ str
326
+ A string containing suggestions for feature engineering based on
327
+ the characteristics of the input data.
328
  """
329
  if data is None:
330
  data = tool.agent.dataset
 
365
  This function builds a classification model using Random Forest, evaluates its performance,
366
  and provides detailed metrics and visualizations such as the confusion matrix and ROC curve.
367
 
368
+ Parameters
369
+ ----------
370
+ data : Optional[pd.DataFrame], optional
371
+ A pandas DataFrame containing the dataset to analyze.
372
+ If None, the agent's stored dataset will be used.
373
+ The DataFrame should contain the target variable for prediction.
374
+ target : Optional[str], optional
375
+ The name of the target variable column in the dataset.
376
+ If None, the agent must provide the target variable through the prompt.
377
+
378
+ Returns
379
+ -------
380
+ str
381
+ A markdown-formatted string containing the classification report, confusion matrix,
382
+ ROC curve, AUC score, and a unique Model ID.
383
  """
384
  if data is None:
385
  data = tool.agent.dataset
 
481
  This function converts markdown content into a PDF file using pdfkit and provides
482
  a download button for users to obtain the report.
483
 
484
+ Parameters
485
+ ----------
486
+ content : str
487
+ The markdown content to be included in the PDF report.
488
+ filename : str
489
+ The desired name for the exported PDF file.
490
 
491
+ Returns
492
+ -------
493
+ None
494
  """
495
  # Save content to a temporary HTML file
496
  with tempfile.NamedTemporaryFile(delete=False, suffix='.html') as tmp_file: